picoVGA lib stripped and reduced clock to 240 for heavy emulators
This commit is contained in:
parent
df037ae74c
commit
447507e8c8
84 changed files with 332 additions and 13959 deletions
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -67,6 +67,8 @@
|
|||
VSYNC and HSYNC */
|
||||
#define VGA_COLORBASE 2
|
||||
#define VGA_SYNCBASE 14
|
||||
#define VGA_VSYNC 15
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -36,14 +36,17 @@ static int skip=0;
|
|||
#include "hardware/vreg.h"
|
||||
|
||||
int main(void) {
|
||||
vreg_set_voltage(VREG_VOLTAGE_1_05);
|
||||
// vreg_set_voltage(VREG_VOLTAGE_1_05);
|
||||
// set_sys_clock_khz(125000, true);
|
||||
// set_sys_clock_khz(150000, true);
|
||||
// set_sys_clock_khz(133000, true);
|
||||
// set_sys_clock_khz(200000, true);
|
||||
// set_sys_clock_khz(210000, true);
|
||||
set_sys_clock_khz(230000, true);
|
||||
// set_sys_clock_khz(225000, true);
|
||||
set_sys_clock_khz(250000, true);
|
||||
// set_sys_clock_khz(250000, true);
|
||||
stdio_init_all();
|
||||
|
||||
#ifdef USE_VGA
|
||||
// tft.begin(VGA_MODE_400x240);
|
||||
tft.begin(VGA_MODE_320x240);
|
||||
|
|
@ -98,8 +101,10 @@ void emu_DrawVsync(void)
|
|||
{
|
||||
skip += 1;
|
||||
skip &= VID_FRAME_SKIP;
|
||||
volatile bool vb=vbl;
|
||||
while (vbl==vb) {};
|
||||
#ifdef USE_VGA
|
||||
tft.waitSync();
|
||||
//tft.waitSync();
|
||||
#else
|
||||
//volatile bool vb=vbl;
|
||||
//while (vbl==vb) {};
|
||||
|
|
|
|||
|
|
@ -36,14 +36,17 @@ static int skip=0;
|
|||
#include "hardware/vreg.h"
|
||||
|
||||
int main(void) {
|
||||
vreg_set_voltage(VREG_VOLTAGE_1_05);
|
||||
// vreg_set_voltage(VREG_VOLTAGE_1_05);
|
||||
// set_sys_clock_khz(125000, true);
|
||||
// set_sys_clock_khz(150000, true);
|
||||
// set_sys_clock_khz(133000, true);
|
||||
// set_sys_clock_khz(200000, true);
|
||||
// set_sys_clock_khz(210000, true);
|
||||
set_sys_clock_khz(230000, true);
|
||||
// set_sys_clock_khz(225000, true);
|
||||
set_sys_clock_khz(250000, true);
|
||||
// set_sys_clock_khz(250000, true);
|
||||
stdio_init_all();
|
||||
|
||||
#ifdef USE_VGA
|
||||
// tft.begin(VGA_MODE_400x240);
|
||||
tft.begin(VGA_MODE_320x240);
|
||||
|
|
@ -97,10 +100,10 @@ void emu_DrawVsync(void)
|
|||
skip += 1;
|
||||
skip &= VID_FRAME_SKIP;
|
||||
#ifdef USE_VGA
|
||||
tft.waitSync();
|
||||
// tft.waitSync();
|
||||
#else
|
||||
// volatile bool vb=vbl;
|
||||
// while (vbl==vb) {};
|
||||
// volatile bool vb=vbl;
|
||||
// while (vbl==vb) {};
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -36,14 +36,17 @@ static int skip=0;
|
|||
|
||||
|
||||
int main(void) {
|
||||
vreg_set_voltage(VREG_VOLTAGE_1_05);
|
||||
// vreg_set_voltage(VREG_VOLTAGE_1_05);
|
||||
// set_sys_clock_khz(125000, true);
|
||||
// set_sys_clock_khz(150000, true);
|
||||
// set_sys_clock_khz(133000, true);
|
||||
// set_sys_clock_khz(200000, true);
|
||||
// set_sys_clock_khz(210000, true);
|
||||
set_sys_clock_khz(230000, true);
|
||||
// set_sys_clock_khz(225000, true);
|
||||
set_sys_clock_khz(250000, true);
|
||||
// set_sys_clock_khz(250000, true);
|
||||
stdio_init_all();
|
||||
|
||||
#ifdef USE_VGA
|
||||
// tft.begin(VGA_MODE_400x240);
|
||||
tft.begin(VGA_MODE_320x240);
|
||||
|
|
@ -63,7 +66,7 @@ int main(void) {
|
|||
tft.fillScreenNoDma( RGBVAL16(0x00,0x00,0x00) );
|
||||
tft.startDMA();
|
||||
struct repeating_timer timer;
|
||||
add_repeating_timer_ms(15, repeating_timer_callback, NULL, &timer);
|
||||
add_repeating_timer_ms(5, repeating_timer_callback, NULL, &timer);
|
||||
}
|
||||
tft.waitSync();
|
||||
}
|
||||
|
|
@ -96,11 +99,13 @@ void emu_DrawVsync(void)
|
|||
{
|
||||
skip += 1;
|
||||
skip &= VID_FRAME_SKIP;
|
||||
#ifdef USE_VGA
|
||||
tft.waitSync();
|
||||
#else
|
||||
volatile bool vb=vbl;
|
||||
while (vbl==vb) {};
|
||||
#ifdef USE_VGA
|
||||
// tft.waitSync();
|
||||
#else
|
||||
// volatile bool vb=vbl;
|
||||
// while (vbl==vb) {};
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -35,13 +35,14 @@ static int skip=0;
|
|||
#include "hardware/vreg.h"
|
||||
|
||||
int main(void) {
|
||||
vreg_set_voltage(VREG_VOLTAGE_1_05);
|
||||
// vreg_set_voltage(VREG_VOLTAGE_1_05);
|
||||
// set_sys_clock_khz(125000, true);
|
||||
// set_sys_clock_khz(150000, true);
|
||||
// set_sys_clock_khz(133000, true);
|
||||
// set_sys_clock_khz(200000, true);
|
||||
set_sys_clock_khz(200000, true);
|
||||
// set_sys_clock_khz(225000, true);
|
||||
set_sys_clock_khz(250000, true);
|
||||
// set_sys_clock_khz(240000, true);
|
||||
// set_sys_clock_khz(250000, true);
|
||||
stdio_init_all();
|
||||
#ifdef USE_VGA
|
||||
tft.begin(VGA_MODE_320x240);
|
||||
|
|
|
|||
|
|
@ -35,13 +35,14 @@ static int skip=0;
|
|||
#include "hardware/vreg.h"
|
||||
|
||||
int main(void) {
|
||||
vreg_set_voltage(VREG_VOLTAGE_1_05);
|
||||
// vreg_set_voltage(VREG_VOLTAGE_1_05);
|
||||
// set_sys_clock_khz(125000, true);
|
||||
// set_sys_clock_khz(150000, true);
|
||||
// set_sys_clock_khz(133000, true);
|
||||
// set_sys_clock_khz(200000, true);
|
||||
set_sys_clock_khz(200000, true);
|
||||
// set_sys_clock_khz(225000, true);
|
||||
set_sys_clock_khz(250000, true);
|
||||
// set_sys_clock_khz(240000, true);
|
||||
// set_sys_clock_khz(250000, true);
|
||||
stdio_init_all();
|
||||
#ifdef USE_VGA
|
||||
tft.begin(VGA_MODE_320x240);
|
||||
|
|
@ -94,11 +95,13 @@ void emu_DrawVsync(void)
|
|||
{
|
||||
skip += 1;
|
||||
skip &= VID_FRAME_SKIP;
|
||||
#ifdef USE_VGA
|
||||
tft.waitSync();
|
||||
#else
|
||||
volatile bool vb=vbl;
|
||||
while (vbl==vb) {};
|
||||
#ifdef USE_VGA
|
||||
// tft.waitSync();
|
||||
#else
|
||||
// volatile bool vb=vbl;
|
||||
// while (vbl==vb) {};
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -35,13 +35,14 @@ static int skip=0;
|
|||
#include "hardware/vreg.h"
|
||||
|
||||
int main(void) {
|
||||
vreg_set_voltage(VREG_VOLTAGE_1_05);
|
||||
// vreg_set_voltage(VREG_VOLTAGE_1_05);
|
||||
// set_sys_clock_khz(125000, true);
|
||||
// set_sys_clock_khz(150000, true);
|
||||
// set_sys_clock_khz(133000, true);
|
||||
// set_sys_clock_khz(200000, true);
|
||||
set_sys_clock_khz(200000, true);
|
||||
// set_sys_clock_khz(225000, true);
|
||||
set_sys_clock_khz(150000, true);
|
||||
// set_sys_clock_khz(240000, true);
|
||||
// set_sys_clock_khz(250000, true);
|
||||
stdio_init_all();
|
||||
#ifdef USE_VGA
|
||||
// tft.begin(VGA_MODE_400x240);
|
||||
|
|
|
|||
|
|
@ -569,11 +569,6 @@ int ExecZ80(register Z80 *R,register int RunCycles)
|
|||
asm volatile("nop");
|
||||
asm volatile("nop");
|
||||
|
||||
asm volatile("nop");
|
||||
asm volatile("nop");
|
||||
asm volatile("nop");
|
||||
asm volatile("nop");
|
||||
asm volatile("nop");
|
||||
#ifndef USE_VGA
|
||||
asm volatile("nop");
|
||||
asm volatile("nop");
|
||||
|
|
|
|||
|
|
@ -37,14 +37,17 @@ static int skip=0;
|
|||
#include "hardware/vreg.h"
|
||||
|
||||
int main(void) {
|
||||
vreg_set_voltage(VREG_VOLTAGE_1_05);
|
||||
// vreg_set_voltage(VREG_VOLTAGE_1_05);
|
||||
// set_sys_clock_khz(125000, true);
|
||||
// set_sys_clock_khz(150000, true);
|
||||
// set_sys_clock_khz(133000, true);
|
||||
// set_sys_clock_khz(200000, true);
|
||||
// set_sys_clock_khz(210000, true);
|
||||
set_sys_clock_khz(230000, true);
|
||||
// set_sys_clock_khz(225000, true);
|
||||
set_sys_clock_khz(250000, true);
|
||||
// set_sys_clock_khz(250000, true);
|
||||
stdio_init_all();
|
||||
|
||||
#ifdef USE_VGA
|
||||
tft.begin(VGA_MODE_320x240);
|
||||
#else
|
||||
|
|
@ -63,7 +66,7 @@ int main(void) {
|
|||
tft.fillScreenNoDma( RGBVAL16(0x00,0x00,0x00) );
|
||||
tft.startDMA();
|
||||
struct repeating_timer timer;
|
||||
add_repeating_timer_ms(20, repeating_timer_callback, NULL, &timer);
|
||||
add_repeating_timer_ms(5, repeating_timer_callback, NULL, &timer);
|
||||
}
|
||||
tft.waitSync();
|
||||
}
|
||||
|
|
@ -94,11 +97,13 @@ void emu_DrawVsync(void)
|
|||
{
|
||||
skip += 1;
|
||||
skip &= VID_FRAME_SKIP;
|
||||
volatile bool vb=vbl;
|
||||
while (vbl==vb) {};
|
||||
#ifdef USE_VGA
|
||||
tft.waitSync();
|
||||
// tft.waitSync();
|
||||
#else
|
||||
// volatile bool vb=vbl;
|
||||
// while (vbl==vb) {};
|
||||
// volatile bool vb=vbl;
|
||||
// while (vbl==vb) {};
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -35,13 +35,14 @@ static int skip=0;
|
|||
#include "hardware/vreg.h"
|
||||
|
||||
int main(void) {
|
||||
vreg_set_voltage(VREG_VOLTAGE_1_05);
|
||||
// vreg_set_voltage(VREG_VOLTAGE_1_05);
|
||||
// set_sys_clock_khz(125000, true);
|
||||
// set_sys_clock_khz(150000, true);
|
||||
// set_sys_clock_khz(133000, true);
|
||||
// set_sys_clock_khz(200000, true);
|
||||
set_sys_clock_khz(200000, true);
|
||||
// set_sys_clock_khz(225000, true);
|
||||
set_sys_clock_khz(150000, true);
|
||||
// set_sys_clock_khz(240000, true);
|
||||
// set_sys_clock_khz(250000, true);
|
||||
stdio_init_all();
|
||||
#ifdef USE_VGA
|
||||
// tft.begin(VGA_MODE_400x240);
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ static void core1_func()
|
|||
//VgaTerm(); // terminate
|
||||
}
|
||||
else
|
||||
VgaInit(v);
|
||||
VgaInit(v,(u8*)framebuffer,320,240,320);
|
||||
__dmb();
|
||||
VgaVmodeReq = NULL;
|
||||
}
|
||||
|
|
@ -133,7 +133,7 @@ vga_error_t VGA_T4::begin(vga_mode_t mode)
|
|||
sem_init(&core1_initted, 0, 1);
|
||||
|
||||
multicore_launch_core1(core1_func);
|
||||
vmode = Video(DEV_VGA, RES_QVGA, FORM_8BIT, framebuffer);
|
||||
vmode = Video(DEV_VGA, RES_QVGA);
|
||||
VgaInitReql(vmode);
|
||||
|
||||
// wait for initialization of audio to be complete
|
||||
|
|
|
|||
|
|
@ -1,142 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// Canvas
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#ifndef _CANVAS_H
|
||||
#define _CANVAS_H
|
||||
|
||||
#define DRAW_HWINTER 1 // 1=use hardware interpolator to draw images
|
||||
|
||||
// canvas format
|
||||
// Note: do not use enum, symbols could not be used by the preprocessor
|
||||
#define CANVAS_8 0 // 8-bit pixels
|
||||
#define CANVAS_4 1 // 4-bit pixels
|
||||
#define CANVAS_2 2 // 2-bit pixels
|
||||
#define CANVAS_1 3 // 1-bit pixels
|
||||
#define CANVAS_PLANE2 4 // 4 colors on 2 planes
|
||||
#define CANVAS_ATTRIB8 5 // 2x4 bit color attributes per 8x8 pixel sample
|
||||
// draw functions: bit 0..3 = draw color
|
||||
// bit 4 = draw color is background color
|
||||
|
||||
// canvas descriptor
|
||||
typedef struct {
|
||||
u8* img; // image data
|
||||
u8* img2; // image data 2 (2nd plane of CANVAS_PLANE2, attributes of CANVAS_ATTRIB8)
|
||||
int w; // width
|
||||
int h; // height
|
||||
int wb; // pitch (bytes between lines)
|
||||
u8 format; // canvas format CANVAS_*
|
||||
} sCanvas;
|
||||
|
||||
// Draw rectangle
|
||||
void DrawRect(sCanvas* canvas, int x, int y, int w, int h, u8 col);
|
||||
|
||||
// Draw frame
|
||||
void DrawFrame(sCanvas* canvas, int x, int y, int w, int h, u8 col);
|
||||
|
||||
// clear canvas (fill with black color)
|
||||
void DrawClear(sCanvas* canvas);
|
||||
|
||||
// Draw point
|
||||
void DrawPoint(sCanvas* canvas, int x, int y, u8 col);
|
||||
|
||||
// Draw line
|
||||
void DrawLine(sCanvas* canvas, int x1, int y1, int x2, int y2, u8 col);
|
||||
|
||||
// Draw filled circle
|
||||
// x0, y0 ... coordinate of center
|
||||
// r ... radius
|
||||
// col ... color
|
||||
// col with CANVAS_ATTRIB8 format: bit 0..3 = draw color, bit 4 = draw color is background color
|
||||
// mask ... mask of used octants (0xff = 255 = draw whole circle)
|
||||
// . B2|B1 .
|
||||
// B3 . | . B0
|
||||
// ------o------
|
||||
// B4 . | . B7
|
||||
// . B5|B6 .
|
||||
void DrawFillCircle(sCanvas* canvas, int x0, int y0, int r, u8 col, u8 mask=0xff);
|
||||
|
||||
// Draw circle
|
||||
// x0, y0 ... coordinate of center
|
||||
// r ... radius
|
||||
// col ... color
|
||||
// col with CANVAS_ATTRIB8 format: bit 0..3 = draw color, bit 4 = draw color is background color
|
||||
// mask ... mask of used octants (0xff = 255 = draw whole circle)
|
||||
// . B2|B1 .
|
||||
// B3 . | . B0
|
||||
// ------o------
|
||||
// B4 . | . B7
|
||||
// . B5|B6 .
|
||||
void DrawCircle(sCanvas* canvas, int x0, int y0, int r, u8 col, u8 mask=0xff);
|
||||
|
||||
// Draw text (transparent background)
|
||||
// font = pointer to 1-bit font
|
||||
void DrawText(sCanvas* canvas, const char* text, int x, int y, u8 col,
|
||||
const void* font, int fontheight=8, int scalex=1, int scaley=1);
|
||||
|
||||
// Draw text with background
|
||||
// font = pointer to 1-bit font
|
||||
void DrawTextBg(sCanvas* canvas, const char* text, int x, int y, u8 col, u8 bgcol,
|
||||
const void* font, int fontheight=8, int scalex=1, int scaley=1);
|
||||
|
||||
// Draw image
|
||||
void DrawImg(sCanvas* canvas, sCanvas* src, int xd, int yd, int xs, int ys, int w, int h);
|
||||
|
||||
// Draw image with transparency (source and destination must have same format, col = transparency key color)
|
||||
// CANVAS_ATTRIB8 format replaced by DrawImg function
|
||||
void DrawBlit(sCanvas* canvas, sCanvas* src, int xd, int yd, int xs, int ys, int w, int h, u8 col);
|
||||
|
||||
// DrawImgMat mode
|
||||
enum {
|
||||
DRAWIMG_WRAP, // wrap image
|
||||
DRAWIMG_NOBORDER, // no border (transparent border)
|
||||
DRAWIMG_CLAMP, // clamp image (use last pixel as border)
|
||||
DRAWING_COLOR, // color border
|
||||
DRAWIMG_TRANSP, // transparent image with key color
|
||||
DRAWIMG_PERSP, // perspective floor
|
||||
};
|
||||
|
||||
// draw 8-bit image with 2D transformation matrix
|
||||
// canvas ... destination canvas
|
||||
// src ... source canvas with image
|
||||
// x ... destination coordinate X
|
||||
// y ... destination coordinate Y
|
||||
// w ... destination width
|
||||
// h ... destination height
|
||||
// m ... transformation matrix (should be prepared using PrepDrawImg or PrepDrawPersp function)
|
||||
// mode ... draw mode DRAWIMG_*
|
||||
// color ... key or border color
|
||||
// Note to wrap and perspective mode: Width and height of source image must be power of 2!
|
||||
void DrawImgMat(sCanvas* canvas, const sCanvas* src, int x, int y, int w, int h,
|
||||
const class cMat2Df* m, u8 mode, u8 color);
|
||||
|
||||
// draw tile map using perspective projection
|
||||
// canvas ... destination canvas
|
||||
// src ... source canvas with column of 8-bit square tiles (width = tile size, must be power of 2)
|
||||
// map ... byte map of tile indices
|
||||
// mapwbits ... number of bits of map width (number of tiles; width must be power of 2)
|
||||
// maphbits ... number of bits of map height (number of tiles; height must be power of 2)
|
||||
// tilebits ... number of bits of tile size (e.g. 5 = tile 32x32 pixel)
|
||||
// x ... destination coordinate X
|
||||
// y ... destination coordinate Y
|
||||
// w ... destination width
|
||||
// h ... destination height
|
||||
// mat ... transformation matrix (should be prepared using PrepDrawPersp function)
|
||||
// horizon ... horizon offset (0=do not use perspective projection)
|
||||
void DrawTileMap(sCanvas* canvas, const sCanvas* src, const u8* map, int mapwbits, int maphbits,
|
||||
int tilebits, int x, int y, int w, int h, const cMat2Df* mat, u8 horizon);
|
||||
|
||||
// draw image line interpolated
|
||||
// canvas = destination canvas (8-bit pixel format)
|
||||
// src = source canvas (source image in 8-bit pixel format)
|
||||
// xd,yd = destination coordinates
|
||||
// xs,ys = source coordinates
|
||||
// wd = destination width
|
||||
// ws = source width
|
||||
// Overflow in X direction is not checked!
|
||||
void DrawImgLine(sCanvas* canvas, sCanvas* src, int xd, int yd, int xs, int ys, int wd, int ws);
|
||||
|
||||
#endif // _CANVAS_H
|
||||
|
|
@ -1,198 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA common definitions of C and ASM
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#include "vga_config.h" // VGA configuration
|
||||
|
||||
#define LAYERS_MAX 4 // max. number of layers (should be 4)
|
||||
|
||||
#define BLACK_MAX MAXX // size of buffer with black color (used to clear rest of unused line)
|
||||
|
||||
// VGA PIO program
|
||||
#define BASE_OFFSET 17 // offset of base layer program
|
||||
#define LAYER_OFFSET 0 // offset of overlapped layer program
|
||||
|
||||
// layer program
|
||||
#define LAYERPROG_BASE 0 // program of base layer (overlapped layers are OFF)
|
||||
#define LAYERPROG_KEY 1 // layer with key color
|
||||
#define LAYERPROG_BLACK 2 // layer with black key color
|
||||
#define LAYERPROG_WHITE 3 // layer with white key color
|
||||
#define LAYERPROG_MONO 4 // layer with mono pattern or simple color
|
||||
#define LAYERPROG_RLE 5 // layer with RLE compression
|
||||
|
||||
#define LAYERPROG_NUM 6 // number of layer programs
|
||||
|
||||
// layer mode (CPP = clock cycles per pixel)
|
||||
// Control buffer: 16 bytes
|
||||
// Data buffer: 4 bytes
|
||||
// fast sprites can be up Control buffer: width*2 bytes
|
||||
// sprites Data buffer: width bytes
|
||||
#define LAYERMODE_BASE 0 // base layer
|
||||
#define LAYERMODE_KEY 1 // layer with key color
|
||||
#define LAYERMODE_BLACK 2 // layer with black key color
|
||||
#define LAYERMODE_WHITE 3 // layer with white key color
|
||||
#define LAYERMODE_MONO 4 // layer with mono pattern
|
||||
#define LAYERMODE_COLOR 5 // layer with simple color
|
||||
#define LAYERMODE_RLE 6 // layer with RLE compression
|
||||
#define LAYERMODE_SPRITEKEY 7 // layer with sprites with key color
|
||||
#define LAYERMODE_SPRITEBLACK 8 // layer with sprites with black key color
|
||||
#define LAYERMODE_SPRITEWHITE 9 // layer with sprites with white key color
|
||||
#define LAYERMODE_FASTSPRITEKEY 10 // layer with fast sprites with key color
|
||||
#define LAYERMODE_FASTSPRITEBLACK 11 // layer with fast sprites with black key color
|
||||
#define LAYERMODE_FASTSPRITEWHITE 12 // layer with fast sprites with white key color
|
||||
#define LAYERMODE_PERSPKEY 13 // layer with key color and image with transformation matrix
|
||||
#define LAYERMODE_PERSPBLACK 14 // layer with black key color and image with transformation matrix
|
||||
#define LAYERMODE_PERSPWHITE 15 // layer with white key color and image with transformation matrix
|
||||
#define LAYERMODE_PERSP2KEY 16 // layer with key color and double-pixel image with transformation matrix
|
||||
#define LAYERMODE_PERSP2BLACK 17 // layer with black key color and double-pixel image with transformation matrix
|
||||
#define LAYERMODE_PERSP2WHITE 18 // layer with white key color and double-pixel image with transformation matrix
|
||||
|
||||
#define LAYERMODE_NUM 19 // number of overlapped layer modes
|
||||
|
||||
// Structure of sprite sSprite (on change update structure sSprite in vga_layer.h)
|
||||
#define SSPRITE_IMG 0 // u8* img; // pointer to image data
|
||||
#define SSPRITE_X0 4 // u8* x0; // pointer to pixel offset of start of lines/4 (used with fast sprites)
|
||||
#define SSPRITE_W0 8 // u8* w0; // pointer to pixel length of length of lines/4 (used with fast sprites)
|
||||
#define SSPRITE_KEYCOL 12 // u32 keycol; // key color
|
||||
#define SSPRITE_X 16 // s16 x; // sprite X-coordinate on the screen
|
||||
#define SSPRITE_Y 18 // s16 y; // sprite Y-coordinate on the screen
|
||||
#define SSPRITE_W 20 // u16 w; // sprite width
|
||||
#define SSPRITE_H 22 // u16 h; // sprite height
|
||||
#define SSPRITE_WB 24 // u16 wb; // sprite pitch (number of bytes between lines)
|
||||
// u16 res; // ...reserved, structure align
|
||||
#define SSPRITE_SIZE 28 // size of sSprite structure
|
||||
|
||||
// Structure of layer screen sLayer (on change update structure sLayer in vga_layer.h)
|
||||
#define SLAYER_IMG 0 // const u8* img; // pointer to image in current layer format, or sprite list
|
||||
#define SLAYER_PAR 4 // const void* par; // additional parameter (RLE index table, transformation matrix)
|
||||
#define SLAYER_INIT 8 // u32 init; // init word sent on start of scanline
|
||||
#define SLAYER_KEYCOL 12 // u32 keycol; // key color
|
||||
#define SLAYER_TRANS 16 // u16 trans; // trans count
|
||||
#define SLAYER_X 18 // s16 x; // start X coordinate
|
||||
#define SLAYER_Y 20 // s16 y; // start Y coordinate
|
||||
#define SLAYER_W 22 // u16 w; // width in pixels
|
||||
#define SLAYER_H 24 // u16 h; // height
|
||||
#define SLAYER_WB 26 // u16 wb; // image width in bytes (pitch of lines)
|
||||
#define SLAYER_MODE 28 // u8 mode; // layer mode
|
||||
#define SLAYER_HORIZ 29 // s8 horiz; // horizon of perspective projection/4 (only with LAYERMODE_PERSP* modes, 0=no perspecitve, <0 ceilling)
|
||||
#define SLAYER_XBITS 30 // u8 xbits; // number of bits of width of source image (only with LAYERMODE_PERSP* modes)
|
||||
#define SLAYER_YBITS 31 // u8 ybits; // number of bits of height of source image (only with LAYERMODE_PERSP* modes)
|
||||
#define SLAYER_SPRITENUM 32 // u16 spritenum; // number of sprites
|
||||
#define SLAYER_ON 34 // Bool on; // layer is ON
|
||||
#define SLAYER_CPP 35 // u8 cpp; // current clock pulses per pixel (used to calculate X coordinate)
|
||||
#define SLAYER_SIZE 36 // size of sLayer structure
|
||||
|
||||
// Structure of video segment sSegm (on change update structure sSegm in vga_screen.h)
|
||||
#define SSEGM_WIDTH 0 // u16 width; // width of this video segment in pixels (must be multiple of 4, 0=inactive segment)
|
||||
#define SSEGM_WB 2 // u16 wb; // pitch - number of bytes between lines
|
||||
#define SSEGM_OFFX 4 // s16 offx; // display offset at X direction (must be multiple of 4)
|
||||
#define SSEGM_OFFY 6 // s16 offy; // display offset at Y direction
|
||||
#define SSEGM_WRAPX 8 // u16 wrapx; // wrap width in X direction (number of pixels, must be multiply of 4 and > 0)
|
||||
// text modes: wrapx must be multiply of 8
|
||||
#define SSEGM_WRAPY 10 // u16 wrapy; // wrap width in Y direction (number of lines, cannot be 0)
|
||||
#define SSEGM_DATA 12 // const void* data; // pointer to video buffer with image data
|
||||
#define SSEGM_FORM 16 // u8 form; // graphics format GF_*
|
||||
#define SSEGM_DBLY 17 // bool dbly; // double Y (2 scanlines per 1 image line)
|
||||
#define SSEGM_PAR3 18 // u16 par3; // SSEGM_PAR3 parameter 3
|
||||
#define SSEGM_PAR 20 // u32 par; // parameter 1: color, pointer to palettes, tile source, font
|
||||
#define SSEGM_PAR2 24 // u32 par2; // parameter 2
|
||||
#define SSEGM_SIZE 28 // size of sSegm structure
|
||||
|
||||
// Structure of video strip sStrip (on change update structure sStrip in vga_screen.h)
|
||||
#define SSTRIP_HEIGHT 0 // u16 height; // height of this strip in number of scanlines
|
||||
#define SSTRIP_NUM 2 // u16 num; // number of video segments
|
||||
#define SSTRIP_SEG 4 // sSegm seg[SEGMAX];
|
||||
#define SSTRIP_SIZE (4+SSEGM_SIZE*SEGMAX) // size of sStrip structure (= 4 + 28*8 = 228 bytes)
|
||||
|
||||
// Structure of video screen sScreen (on change update structure sScreen in vga_screen.h)
|
||||
#define SSCREEN_NUM 0 // u16 num; // number of video strips
|
||||
#define SSCREEN_BACKUP 2 // u16 num_backup; // backup number of video strips during display OFF
|
||||
#define SSCREEN_STRIP 4 // sStrip strip[STRIPMAX]; // list of video strips
|
||||
#define SSCREEN_SIZE (4+SSTRIP_SIZE*STRIPMAX) // size of sScreen structure (= 4 + 228*8 = 1828 bytes)
|
||||
|
||||
// --- graphics formats
|
||||
// There are 3 groups of formats - separated due internal reasons, do not mix them.
|
||||
|
||||
// 1st group of formats - rendered specially
|
||||
#define GF_COLOR 0 // simple color (par=color pattern 4-pixels even line, par2=color pattern 4-pixels odd line)
|
||||
// Data buffer: width bytes (320 pixels: 320 bytes)
|
||||
// Control buffer: 8 bytes
|
||||
|
||||
// 2nd group of formats - rendering into control buffer cbuf
|
||||
#define GF_GRAPH8 1 // native 8-bit graphics (X1Y1R2G2B2) - fast, transfers "as is" to PIO
|
||||
// (num = number of pixels/4 = number of bytes/4)
|
||||
// Control buffer: 8 bytes (320 pixels: 8 bytes)
|
||||
#define GF_TILE 2 // tiles (par = tile table with one column of tiles,
|
||||
// par2 = tile height, par3 = tile width as multiple of 4)
|
||||
// Control buffer: width/tile width*8 bytes (320 pixels of 32x32: 80 bytes)
|
||||
#define GF_TILE2 3 // alternate tiles (par = tile table with one row of tiles,
|
||||
// par2 = LOW tile height, HIGH tile width bytes,
|
||||
// par3 = tile width as multiple of 4)
|
||||
// Control buffer: width/tile width*8 bytes (320 pixels of 32x32: 80 bytes)
|
||||
#define GF_PROGRESS 4 // horizontal progress indicator (data = values 0..255 of 4-pixels in rows,
|
||||
// par = scanline gradient < data, par2 = scanline gradient >= data)
|
||||
// Control buffer: 16 bytes
|
||||
#define GF_GRAD1 5 // gradient with 1 line
|
||||
// Control buffer: 8 bytes (320 pixels: 8 bytes)
|
||||
#define GF_GRAD2 6 // gradient with 2 lines
|
||||
// Control buffer: 8 bytes (320 pixels: 8 bytes)
|
||||
|
||||
#define GF_GRP2MIN GF_GRAPH8 // 2nd group minimal format
|
||||
#define GF_GRP2MAX GF_GRAD2 // 2nd group maximal format
|
||||
|
||||
// 3rd group of formats - rendering into data buffer dbuf
|
||||
// Control buffer: 8 bytes
|
||||
// Data buffer: width bytes
|
||||
#define GF_GRAPH4 7 // 4-bit graphics (num = number of pixels/4 = number of bytes/2;
|
||||
// par = pointer to 16-color palette translation table)
|
||||
#define GF_GRAPH2 8 // 2-bit graphics (num = number of pixels/4 = number of bytes,
|
||||
// par = pointer to 4-color palette translation table)
|
||||
#define GF_GRAPH1 9 // 1-bit graphics (num = number of pixels/8 = number of bytes,
|
||||
// par = 2 colors of palettes)
|
||||
#define GF_MTEXT 10 // 8-pixel mono text (num = number of characters, font is 8-bit width,
|
||||
// par = pointer to 1-bit font, par2 = 2 colors of palettes)
|
||||
#define GF_ATEXT 11 // 8-pixel attribute text, character + 2x4 bit attributes
|
||||
// (num = number of characters, font is 8-bit width,
|
||||
// par = pointer to 1-bit font, par2 = pointer to 16 colors of palettes)
|
||||
#define GF_FTEXT 12 // 8-pixel foreground color text, character + foreground color
|
||||
// (num = number of characters, font is 8-bit width,
|
||||
// par = pointer to 1-bit font, par2 = background color)
|
||||
#define GF_CTEXT 13 // 8-pixel color text, character + background color + foreground color
|
||||
// (num = number of characters, font is 8-bit width,
|
||||
// par = pointer to 1-bit font)
|
||||
#define GF_GTEXT 14 // 8-pixel gradient text (par = pointer to 1-bit font, par2 = pointer to color array)
|
||||
#define GF_DTEXT 15 // 8-pixel double gradient text (par = pointer to 1-bit font, par2 = pointer to color array)
|
||||
#define GF_LEVEL 16 // level graph (data=samples 0..255, par = 2 colors of palettes, par2 = Y zero level 0..255)
|
||||
#define GF_LEVELGRAD 17 // level gradient graph (data = samples 0..255, par = scanline gradient < data, par2 = scanline gradient >= data)
|
||||
#define GF_OSCIL 18 // oscilloscope pixel graph (data=samples 0..255, par = 2 colors of palettes, par2 = height of pixels - 1)
|
||||
#define GF_OSCLINE 19 // oscilloscope line graph (data=samples 0..255, par = 2 colors of palettes)
|
||||
#define GF_PLANE2 20 // 4 colors on 2 graphic planes (data=graphic, par=offset of 2nd graphic plane,
|
||||
// par2 = pointer to 4-color palette translation table)
|
||||
#define GF_ATTRIB8 21 // 2x4 bit color attribute per 8x8 pixel sample (data=mono graphic, par=offset of color attributes,
|
||||
// par2 = pointer to 16-color palette table)
|
||||
#define GF_GRAPH8MAT 22 // 8-bit graphics with 2D matrix transformation, using hardware interpolator inter1 (inter1 state is not saved during interrup)
|
||||
// (data=image, par=pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL)),
|
||||
// par2 LOW=number of bits of image width, par2 HIGH=number of bits of image height)
|
||||
#define GF_GRAPH8PERSP 23 // 8-bit graphics with perspective, using hardware interpolator inter1 (inter1 state is not saved during interrup)
|
||||
// (data=image, par=pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL)),
|
||||
// par2 LOW=number of bits of image width, par2 HIGH=number of bits of image height,
|
||||
// par3=horizon offset)
|
||||
#define GF_TILEPERSP 24 // tiles with perspective, using hardware interpolators inter0 and inter1 (their state is not saved during interrup)
|
||||
// (data=tile map, par=one column of tiles, par2=pointer to integer matrix,
|
||||
// wb LOW=number of bits of map width, wb HIGH=number of bits of map height,
|
||||
// par3 LOW=number of bits of tile size, par3 HIGH=horizon offset/4 or 0=no perspective or <0=ceilling,
|
||||
// wrapy=segment height)
|
||||
#define GF_TILEPERSP15 25 // tiles with perspective, 1.5 pixels (parameters as GF_TILEPERSP)
|
||||
#define GF_TILEPERSP2 26 // tiles with perspective, double pixels (parameters as GF_TILEPERSP)
|
||||
#define GF_TILEPERSP3 27 // tiles with perspective, triple pixels (parameters as GF_TILEPERSP)
|
||||
#define GF_TILEPERSP4 28 // tiles with perspective, quadruple pixels (parameters as GF_TILEPERSP)
|
||||
|
||||
#define GF_GRP3MIN GF_GRAPH4 // 3rd group minimal format
|
||||
#define GF_GRP3MAX GF_TILEPERSP4 // 3rd group maximal format
|
||||
|
||||
|
||||
#define FRACT 12 // number of bits of fractional part of fractint number (use max. 13, min. 8)
|
||||
#define FRACTMUL (1<<FRACT)
|
||||
|
|
@ -3,6 +3,10 @@
|
|||
//
|
||||
// Global common definitions
|
||||
//
|
||||
// file derived from the PicoVGA project
|
||||
// https://github.com/Panda381/PicoVGA
|
||||
// by Miroslav Nemecek
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
|
@ -47,8 +51,10 @@ typedef unsigned char Bool;
|
|||
|
||||
// align array to 4-bytes
|
||||
#define ALIGNED __attribute__((aligned(4)))
|
||||
#define ALIGN4(x) ((x) & ~3)
|
||||
|
||||
#define LED_PIN 25
|
||||
// swap bytes of command
|
||||
#define BYTESWAP(n) ((((n)&0xff)<<24)|(((n)&0xff00)<<8)|(((n)&0xff0000)>>8)|(((n)&0xff000000)>>24))
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Constants
|
||||
|
|
@ -96,8 +102,6 @@ typedef unsigned char Bool;
|
|||
#define PI 3.14159265358979324
|
||||
#define PI2 (3.14159265358979324*2)
|
||||
|
||||
//extern const ALIGNED u8 FontBoldB8x16[4096];
|
||||
|
||||
#define VGA_RGB(r,g,b) ( (((r>>5)&0x07)<<5) | (((g>>5)&0x07)<<2) | (((b>>6)&0x3)<<0) )
|
||||
|
||||
|
||||
|
|
@ -125,12 +129,8 @@ typedef unsigned char Bool;
|
|||
|
||||
|
||||
// PicoVGA includes
|
||||
#include "define.h" // common definitions of C and ASM
|
||||
#include "canvas.h" // canvas
|
||||
#include "vga_config.h" // VGA configuration
|
||||
#include "vga_vmode.h" // VGA videomodes
|
||||
#include "vga_layer.h" // VGA layers
|
||||
#include "vga_screen.h" // VGA screen layout
|
||||
#include "vga_pal.h" // VGA palette
|
||||
#include "vga.h" // VGA output
|
||||
#include "picovga.pio.h" // PIO
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,11 @@
|
|||
|
||||
; ============================================================================
|
||||
; VGA output - base layer (15 instructions)
|
||||
;
|
||||
; file derived from the PicoVGA project
|
||||
; https://github.com/Panda381/PicoVGA
|
||||
; by Miroslav Nemecek
|
||||
;
|
||||
; ============================================================================
|
||||
; Control word of "dark" command (left shift):
|
||||
; - bit 0..7 (8 bits) output color (set to 0 if not used)
|
||||
|
|
@ -60,218 +65,4 @@ public extra2:
|
|||
; wrap jump to instruction out pc,5
|
||||
.wrap
|
||||
|
||||
; ============================================================================
|
||||
; VGA output - layer with key color (13 instructions)
|
||||
; ============================================================================
|
||||
; Control word (left shift):
|
||||
; - bit 0..10 (11 bits) number of pixels - 1 (number of pixels must be multiple of 4)
|
||||
; - bit 11..18 (8 bits) key color
|
||||
; - bit 19..31 (13 bits) start delay D = clock cycles - 7 between irq and first pixel
|
||||
; Clocks per pixel: minimum 6, maximum 37.
|
||||
|
||||
.program keylayer
|
||||
.origin 0 ; must load at offset 0 (LAYER_OFF)
|
||||
|
||||
; idle wait
|
||||
.wrap_target
|
||||
public idle:
|
||||
pull block ; [1] idle wait
|
||||
|
||||
public entry:
|
||||
wait 0 irq 4 ; [1] wait for IRQ sync goes 0
|
||||
out x,13 ; [1] get length of delay - 7
|
||||
layer_wait:
|
||||
jmp x--,layer_wait ; [1] delay loop
|
||||
out y,8 ; [1] get key color
|
||||
out x,11 ; [1] get number of pixels-1
|
||||
layer_loop:
|
||||
mov isr,x ; [1] save pixel counter into ISR
|
||||
out x,8 ; [1] get output pixel
|
||||
jmp x!=y,layer_2 ; [1] jump if pixel is not transparent
|
||||
jmp layer_3 ; [1] jump to end of loop
|
||||
layer_2:
|
||||
mov pins,x ; [1] output pixel to pins
|
||||
layer_3:
|
||||
public extra1:
|
||||
mov x,isr [0] ; [1+CPP-6] return pixel counter (set extra wait CPP-6)
|
||||
jmp x--,layer_loop ; [1] loop next pixel
|
||||
; wrap jump to idle
|
||||
.wrap
|
||||
|
||||
; ============================================================================
|
||||
; VGA output - layer with black key color (11 instructions)
|
||||
; ============================================================================
|
||||
; Control word (left shift):
|
||||
; - bit 0..15 (16 bits) number of pixels - 1 (number of pixels must be multiple of 4)
|
||||
; - bit 16..31 (16 bits) start delay D = clock cycles - 5 between irq and first pixel
|
||||
; Cannot display black pixel (it is used as transparency)
|
||||
; Clocks per pixel: minimum 4, maximum 34.
|
||||
|
||||
.program blacklayer
|
||||
.origin 0 ; must load at offset 0 (LAYER_OFF)
|
||||
|
||||
; idle wait
|
||||
.wrap_target
|
||||
public idle:
|
||||
pull block ; [1] idle wait
|
||||
|
||||
public entry:
|
||||
wait 0 irq 4 ; [1] wait for IRQ sync goes 0
|
||||
out x,16 ; [1] get length of delay - 5
|
||||
layer_wait:
|
||||
jmp x--,layer_wait ; [1] delay loop
|
||||
out x,16 ; [1] get number of pixels-1
|
||||
layer_loop:
|
||||
out y,8 ; [1] get output pixel
|
||||
jmp !y,layer_2 ; [1] jump if pixel is transparent (color = 0)
|
||||
mov pins,y ; [1] output pixel to pins
|
||||
public extra1:
|
||||
jmp x--,layer_loop [0] ; [1+CPP-4] loop next pixel (set extra wait CPP-4)
|
||||
jmp idle ; [1] go idle
|
||||
layer_2:
|
||||
public extra2:
|
||||
jmp x--,layer_loop [0] ; [1+CPP-3] loop next pixel (set extra wait CPP-3)
|
||||
; wrap jump to idle
|
||||
.wrap
|
||||
|
||||
; ============================================================================
|
||||
; VGA output - layer with white key color (10 instructions)
|
||||
; ============================================================================
|
||||
; Control word (left shift):
|
||||
; - bit 0..15 (16 bits) number of pixels - 1 (number of pixels must be multiple of 4)
|
||||
; - bit 16..31 (16 bits) start delay D = clock cycles - 5 between irq and first pixel
|
||||
; Cannot display white pixel (it is used as transparency). Source pixels must be incremented + 1.
|
||||
; Clocks per pixel: minimum 4, maximum 35.
|
||||
|
||||
.program whitelayer
|
||||
.origin 0 ; must load at offset 0 (LAYER_OFF)
|
||||
|
||||
; idle wait
|
||||
.wrap_target
|
||||
public idle:
|
||||
pull block ; [1] idle wait
|
||||
|
||||
public entry:
|
||||
wait 0 irq 4 ; [1] wait for IRQ sync goes 0
|
||||
out x,16 ; [1] get length of delay - 7
|
||||
layer_wait:
|
||||
jmp x--,layer_wait ; [1] delay loop
|
||||
out x,16 ; [1] get number of pixels-1
|
||||
layer_loop:
|
||||
out y,8 ; [1] get output pixel
|
||||
jmp y--,layer_2 ; [1] jump if pixel is not transparent (color != 0)
|
||||
jmp layer_3 ; [1] jump to end of loop
|
||||
layer_2:
|
||||
mov pins,y ; [1] output pixel to pins
|
||||
public extra1:
|
||||
layer_3:
|
||||
jmp x--,layer_loop [0] ; [1+CPP-4] loop next pixel (set extra wait CPP-4)
|
||||
; wrap jump to idle
|
||||
.wrap
|
||||
|
||||
; ============================================================================
|
||||
; VGA output - layer with mono or color pattern (16 instructions)
|
||||
; ============================================================================
|
||||
; Control word (left shift):
|
||||
; - bit 0 (1 bit) flag 0=use color opaque mode, 1=use mono transparent mode
|
||||
; - bit 1..11 (11 bits) number of pixels - 1 (number of pixels must be multiple of 32 in mono, or 4 in color)
|
||||
; - bit 12..19 (8 bits) key color
|
||||
; - bit 20..31 (12 bits) start delay D = clock cycles - 8 between irq and first mono pixel, or 6 for color pixel
|
||||
; Mono, clocks per pixel: minimum 4, maximum 35.
|
||||
; Color, clocks per pixel: minimum 2, maximum 33.
|
||||
|
||||
.program monolayer
|
||||
.origin 0 ; must load at offset 0 (LAYER_OFF)
|
||||
|
||||
.wrap_target
|
||||
public idle:
|
||||
pull block ; [1] idle wait
|
||||
|
||||
public entry:
|
||||
wait 0 irq 4 ; [1] wait for IRQ sync goes 0
|
||||
out x,12 ; [1] get length of delay - 8 (or 6 in color)
|
||||
layer_wait:
|
||||
jmp x--,layer_wait ; [1] delay loop
|
||||
out isr,8 ; [1] get key color
|
||||
out y,11 ; [1] get number of pixels-1
|
||||
out x,1 ; [1] get mode flag
|
||||
jmp !x,layer_color ; [1] 0=use color mode
|
||||
layer_loop:
|
||||
out x,1 ; [1] get one bit
|
||||
jmp !x,layer_out ; [1] bit=0, output pixel
|
||||
jmp layer_skip ; [1] jump to end of loop
|
||||
layer_out:
|
||||
mov pins,isr ; [1] output pixel
|
||||
layer_skip:
|
||||
public extra1:
|
||||
jmp y--,layer_loop [0] ; [1+CPP-4] loop next pixel (set extra wait CPP-4)
|
||||
jmp idle
|
||||
|
||||
layer_color:
|
||||
out pins,8
|
||||
public extra2:
|
||||
jmp y--,layer_color [0] ; [1+CPP-2] loop next pixel (set extra wait CPP-2)
|
||||
; wrap jump to idle
|
||||
.wrap
|
||||
|
||||
; ============================================================================
|
||||
; VGA output - layer with RLE compression (17 instructions)
|
||||
; ============================================================================
|
||||
; Input is left shifted with byte-swap (lower byte comes first)
|
||||
; Requires 3 clock cycles per pixel.
|
||||
; Clocks per pixel: minimum 3, maximum 32.
|
||||
|
||||
.program rlelayer
|
||||
.origin 0 ; must load at offset 0 (LAYER_OFF)
|
||||
|
||||
; [1 instruction] idle wait (tokens: {8} ignored, {8} 'idle' command)
|
||||
public idle:
|
||||
out pc,8 ; [1] idle wait
|
||||
|
||||
; [4 instructions] start
|
||||
public entry:
|
||||
wait 0 irq 4 ; [1] wait for IRQ sync goes 0
|
||||
out x,32 [2] ; [3] get length of delay - 7
|
||||
entry_wait:
|
||||
jmp x--,entry_wait ; [1] delay
|
||||
jmp raw_next ; [1]
|
||||
|
||||
; [1 instruction] skip N+2 (2..257) pixels (tokens: {8} N = number of pixels - 2, {8} 'skip' command)
|
||||
public skip:
|
||||
public extra1:
|
||||
jmp x--,skip [0] ; [1+CPP-1] wait (set extra wait CPP-1)
|
||||
|
||||
; [1 instruction] skip 1 pixel (tokens: {8} ignored, {8} 'skip1' command)
|
||||
public skip1:
|
||||
public extra2:
|
||||
jmp raw_next [0] ; [1+CPP-3] jump (set extra wait CPP-3)
|
||||
|
||||
; [4 instructions] repeat N+3 (3..258) pixels (tokens: {8} pixel to repeat, {8} 'run' command, {8} N = number of pixels - 3)
|
||||
public run:
|
||||
public extra3:
|
||||
mov pins,x [0] ; [1+CPP-2] output pixel (set extra wait CPP-2)
|
||||
out y,8 ; [1] get counter N
|
||||
run_loop:
|
||||
public extra4:
|
||||
mov pins,x [0] ; [1+CPP-2] output pixel (set extra wait CPP-2)
|
||||
jmp y--,run_loop ; [1] next pixel
|
||||
|
||||
; [1 instruction] output 1 RAW pixel (tokens: {8} pixel, {8} 'raw1' command)
|
||||
public raw1:
|
||||
public extra5:
|
||||
mov pins,x [0] ; [1+CPP-3] output pixel (set extra wait CPP-3)
|
||||
.wrap_target
|
||||
raw_next:
|
||||
out x,8 ; [1] get counter N
|
||||
out pc,8 ; [1] jump
|
||||
|
||||
; [5 instructions] output N+2 (2..257) RAW pixels (tokens: {8} N = number of pixels - 2, {8} 'raw' command, {(N+2)*8} pixels)
|
||||
public raw: ; 14:
|
||||
raw_loop:
|
||||
public extra6:
|
||||
out pins,8 [0] ; [1+CPP-2] output pixel (set extra wait CPP-2)
|
||||
jmp x--,raw_loop ; [1] loop next pixel
|
||||
public extra7:
|
||||
out pins,8 [0] ; [1+CPP-3] output pixel (set extra wait CPP-3)
|
||||
; wrap jump to raw_next
|
||||
.wrap
|
||||
|
|
|
|||
|
|
@ -1,362 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_ATEXT
|
||||
//
|
||||
// ****************************************************************************
|
||||
// u32 par SSEGM_PAR pointer to the font
|
||||
// u32 par2 SSEGM_PAR2 pointer to 16 colors of palettes
|
||||
// u16 par3 font height
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
#include "hardware/regs/sio.h" // registers of hardware divider
|
||||
#include "hardware/regs/addressmap.h" // SIO base address
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// render font pixel mask
|
||||
.extern RenderTextMask // u32 RenderTextMask[512];
|
||||
|
||||
// extern "C" u8* RenderAText(u8* dbuf, int x, int y, int w, sSegm* segm)
|
||||
|
||||
// render 8-pixel attribute text GF_ATEXT
|
||||
// R0 ... destination data buffer
|
||||
// R1 ... start X coordinate (in pixels, must be multiple of 4)
|
||||
// R2 ... start Y coordinate (in graphics lines)
|
||||
// R3 ... width to display (must be multiple of 4 and > 0)
|
||||
// [stack] ... segm video segment sSegm
|
||||
// Output new pointer to destination data buffer.
|
||||
// 320 pixels takes 11.9 us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderAText
|
||||
RenderAText:
|
||||
|
||||
// push registers
|
||||
push {r1-r7,lr}
|
||||
mov r4,r8
|
||||
push {r4}
|
||||
|
||||
// Stack content:
|
||||
// SP+0: R8
|
||||
// SP+4: R1 start X coordinate
|
||||
// SP+8: R2 start Y coordinate (later: base pointer to text data row)
|
||||
// SP+12: R3 width to display
|
||||
// SP+16: R4
|
||||
// SP+20: R5
|
||||
// SP+24: R6
|
||||
// SP+28: R7
|
||||
// SP+32: LR
|
||||
// SP+36: video segment (later: wrap width in X direction)
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#36] // load video segment -> R4
|
||||
|
||||
// start divide Y/font height
|
||||
ldr r6,RenderAText_pSioBase // get address of SIO base -> R6
|
||||
str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate
|
||||
ldrh r2,[r4,#SSEGM_PAR3] // font height -> R2
|
||||
str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height
|
||||
|
||||
// - now we must wait at least 8 clock cycles to get result of division
|
||||
|
||||
// [6] get wrap width -> [SP+36]
|
||||
ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width
|
||||
movs r7,#3 // [1] mask to align to 32-bit
|
||||
bics r5,r7 // [1] align wrap
|
||||
str r5,[sp,#36] // [2] save wrap width
|
||||
|
||||
// [1] align X coordinate to 32-bit
|
||||
bics r1,r7 // [1]
|
||||
|
||||
// [3] align remaining width
|
||||
bics r3,r7 // [1]
|
||||
str r3,[sp,#12] // [2] save new width
|
||||
|
||||
// load result of division Y/font_height -> R6 Y relative at row, R7 Y row
|
||||
// Note: QUOTIENT must be read last
|
||||
ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row
|
||||
ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row
|
||||
|
||||
// pointer to font line -> R3
|
||||
lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long)
|
||||
ldr r3,[r4,#SSEGM_PAR] // get pointer to font
|
||||
add r3,r5 // line offset + font base -> pointer to current font line R3
|
||||
|
||||
// base pointer to text data (without X) -> [SP+8], R2
|
||||
ldrh r5,[r4,#SSEGM_WB] // get pitch of rows
|
||||
muls r2,r5 // Y * WB -> offset of row in text buffer
|
||||
ldr r5,[r4,#SSEGM_DATA] // pointer to data
|
||||
add r2,r5 // base address of text buffer
|
||||
str r2,[sp,#8] // save pointer to text buffer
|
||||
|
||||
// prepare pointer to text data with X -> R2 (1 position is 1 character + 1 attributes)
|
||||
lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width)
|
||||
add r2,r6 // add index
|
||||
add r2,r6 // add index*2, pointer to source text buffer -> R2
|
||||
|
||||
// prepare pointer to palettes -> R8
|
||||
ldr r5,[r4,#SSEGM_PAR2] // get pointer to palette table -> R4
|
||||
mov r8,r5 // save pointer to palette table
|
||||
|
||||
// prepare pointer to conversion table -> LR
|
||||
ldr r5,RenderAText_Addr // get pointer to conversion table -> R5
|
||||
mov lr,r5 // conversion table -> LR
|
||||
|
||||
// ---- render 2nd half of first character
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... start X coordinate
|
||||
// R2 ... pointer to source text buffer
|
||||
// R3 ... pointer to font line
|
||||
// R4 ... background color (expanded to 32-bit)
|
||||
// R5 ... (temporary)
|
||||
// R6 ... foreground color (expanded to 32-bit)
|
||||
// R7 ... (temporary)
|
||||
// R8 ... pointer to palette table
|
||||
// LR ... pointer to conversion table
|
||||
// [SP+8] ... base pointer to text data (without X)
|
||||
// [SP+12] ... remaining width
|
||||
// [SP+36] ... wrap width
|
||||
|
||||
// check bit 2 of X coordinate - check if image starts with 2nd half of first character
|
||||
lsls r6,r1,#29 // check bit 2 of X coordinate
|
||||
bpl 2f // bit 2 not set, starting even 4-pixels
|
||||
|
||||
// [6] load background color -> R4
|
||||
ldrb r6,[r2,#1] // [2] load color attributes -> R6
|
||||
mov r5,r8 // [1] get palette table -> R5
|
||||
lsrs r4,r6,#4 // [1] prepare index of background color
|
||||
ldrb r4,[r5,r4] // [2] load background color
|
||||
|
||||
// [4] load foreground color -> R6
|
||||
lsls r6,#28 // [1] isolate lower 4 bits
|
||||
lsrs r6,#28 // [1] mask lower 4 bits
|
||||
ldrb r6,[r5,r6] // [2] load foreground color
|
||||
|
||||
// [4] expand background color to 32-bit -> R4
|
||||
lsls r5,r4,#8 // [1] shift background color << 8
|
||||
orrs r5,r4 // [1] color expanded to 16 bits
|
||||
lsls r4,r5,#16 // [1] shift 16-bit color << 16
|
||||
orrs r4,r5 // [1] color expanded to 32 bits
|
||||
|
||||
// [4] expand foreground color to 32-bit -> R6
|
||||
lsls r5,r6,#8 // [1] shift foreground color << 8
|
||||
orrs r5,r6 // [1] color expanded to 16 bits
|
||||
lsls r6,r5,#16 // [1] shift 16-bit color << 16
|
||||
orrs r6,r5 // [1] color expanded to 32 bits
|
||||
|
||||
// [1] XOR foreground and background color -> R6
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// [4] load font sample -> R5
|
||||
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
|
||||
ldrb r5,[r3,r5] // [2] load font sample -> R5
|
||||
adds r2,#2 // [1] shift pointer to source text buffer
|
||||
|
||||
// [2] prepare conversion table -> R5
|
||||
lsls r5,#3 // [1] multiply font sample * 8
|
||||
add r5,lr // [1] add pointer to conversion table
|
||||
|
||||
// [6] convert second 4 pixels (lower 4 bits)
|
||||
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
stmia r0!,{r7} // [2] store second 4 pixels
|
||||
|
||||
// shift X coordinate
|
||||
adds r1,#4 // shift X coordinate
|
||||
|
||||
// check end of segment
|
||||
ldr r7,[sp,#36] // load wrap width
|
||||
cmp r1,r7 // end of segment?
|
||||
blo 1f
|
||||
movs r1,#0 // reset X coordinate
|
||||
ldr r2,[sp,#8] // get base pointer to text data -> R2
|
||||
|
||||
// shift remaining width
|
||||
1: ldr r7,[sp,#12] // get remaining width
|
||||
subs r7,#4 // shift width
|
||||
str r7,[sp,#12] // save new width
|
||||
|
||||
// prepare wrap width - start X -> R7
|
||||
2: ldr r7,[sp,#36] // load wrap width
|
||||
subs r7,r1 // pixels remaining to end of segment
|
||||
|
||||
// ---- start outer loop, render one part of segment
|
||||
// Outer loop variables (* prepared before outer loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... number of characters to generate in one part of segment
|
||||
// R2 ... *pointer to source text buffer
|
||||
// R3 ... *pointer to font line
|
||||
// R4 ... background color (expanded to 32-bit)
|
||||
// R5 ... (temporary)
|
||||
// R6 ... foreground color (expanded to 32-bit)
|
||||
// R7 ... *wrap width of this segment, later: temporary
|
||||
// R8 ... *pointer to palette table
|
||||
// LR ... *pointer to conversion table
|
||||
// [SP+8] ... *base pointer to text data (without X)
|
||||
// [SP+12] ... *remaining width
|
||||
// [SP+36] ... *wrap width
|
||||
|
||||
RenderAText_OutLoop:
|
||||
|
||||
// limit wrap width by total width -> R7
|
||||
ldr r6,[sp,#12] // get remaining width
|
||||
cmp r7,r6 // compare with wrap width
|
||||
bls 2f // width is OK
|
||||
mov r7,r6 // limit wrap width
|
||||
|
||||
// check if remain whole characters
|
||||
2: cmp r7,#8 // check number of remaining pixels
|
||||
bhs 5f // enough characters remain
|
||||
|
||||
// check if 1st part of last character remains
|
||||
cmp r7,#4 // check 1st part of last character
|
||||
blo 3f // all done
|
||||
|
||||
// ---- render 1st part of last character
|
||||
|
||||
RenderAText_Last:
|
||||
|
||||
// [6] load background color -> R4
|
||||
ldrb r6,[r2,#1] // [2] load color attributes -> R6
|
||||
mov r5,r8 // [1] get palette table -> R5
|
||||
lsrs r4,r6,#4 // [1] prepare index of background color
|
||||
ldrb r4,[r5,r4] // [2] load background color
|
||||
|
||||
// [4] load foreground color -> R6
|
||||
lsls r6,#28 // [1] isolate lower 4 bits
|
||||
lsrs r6,#28 // [1] mask lower 4 bits
|
||||
ldrb r6,[r5,r6] // [2] load foreground color
|
||||
|
||||
// [4] expand background color to 32-bit -> R4
|
||||
lsls r5,r4,#8 // [1] shift background color << 8
|
||||
orrs r5,r4 // [1] color expanded to 16 bits
|
||||
lsls r4,r5,#16 // [1] shift 16-bit color << 16
|
||||
orrs r4,r5 // [1] color expanded to 32 bits
|
||||
|
||||
// [4] expand foreground color to 32-bit -> R6
|
||||
lsls r5,r6,#8 // [1] shift foreground color << 8
|
||||
orrs r5,r6 // [1] color expanded to 16 bits
|
||||
lsls r6,r5,#16 // [1] shift 16-bit color << 16
|
||||
orrs r6,r5 // [1] color expanded to 32 bits
|
||||
|
||||
// [1] XOR foreground and background color -> R6
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// [4] load font sample -> R5
|
||||
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
|
||||
ldrb r5,[r3,r5] // [2] load font sample -> R5
|
||||
adds r2,#2 // [1] shift pointer to source text buffer
|
||||
|
||||
// [2] prepare conversion table -> R5
|
||||
lsls r5,#3 // [1] multiply font sample * 8
|
||||
add r5,lr // [1] add pointer to conversion table
|
||||
|
||||
// [6] convert first 4 pixels (higher 4 bits)
|
||||
ldr r1,[r5,#0] // [2] load mask for higher 4 bits
|
||||
ands r1,r6 // [1] mask foreground color
|
||||
eors r1,r4 // [1] combine with background color
|
||||
stmia r0!,{r1} // [2] store first 4 pixels
|
||||
|
||||
// check if continue with next segment
|
||||
ldr r2,[sp,#8] // get base pointer to text data -> R2
|
||||
cmp r7,#4
|
||||
bhi RenderAText_OutLoop
|
||||
|
||||
// pop registers and return
|
||||
3: pop {r4}
|
||||
mov r8,r4
|
||||
pop {r1-r7,pc}
|
||||
|
||||
// ---- prepare to render whole characters
|
||||
|
||||
// prepare number of whole characters to render -> R1
|
||||
5: lsrs r1,r7,#2 // shift to get number of characters*2
|
||||
lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5
|
||||
subs r6,r5 // get remaining width
|
||||
str r6,[sp,#12] // save new remaining width
|
||||
subs r1,#1 // number of characters*2 - 1
|
||||
|
||||
// ---- [41*N-1] start inner loop, render characters in one part of segment
|
||||
// Inner loop variables (* prepared before inner loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... *number of characters to generate*2 - 1 (loop counter)
|
||||
// R2 ... *pointer to source text buffer
|
||||
// R3 ... *pointer to font line
|
||||
// R4 ... background color (expanded to 32-bit)
|
||||
// R5 ... font sample
|
||||
// R6 ... foreground color (expanded to 32-bit)
|
||||
// R7 ... (temporary)
|
||||
// R8 ... *pointer to palette table
|
||||
// LR ... *pointer to conversion table
|
||||
|
||||
RenderAText_InLoop:
|
||||
|
||||
// [6] load background color -> R4
|
||||
ldrb r6,[r2,#1] // [2] load color attributes -> R6
|
||||
mov r5,r8 // [1] get palette table -> R5
|
||||
lsrs r4,r6,#4 // [1] prepare index of background color
|
||||
ldrb r4,[r5,r4] // [2] load background color
|
||||
|
||||
// [4] load foreground color -> R6
|
||||
lsls r6,#28 // [1] isolate lower 4 bits
|
||||
lsrs r6,#28 // [1] mask lower 4 bits
|
||||
ldrb r6,[r5,r6] // [2] load foreground color
|
||||
|
||||
// [4] expand background color to 32-bit -> R4
|
||||
lsls r5,r4,#8 // [1] shift background color << 8
|
||||
orrs r5,r4 // [1] color expanded to 16 bits
|
||||
lsls r4,r5,#16 // [1] shift 16-bit color << 16
|
||||
orrs r4,r5 // [1] color expanded to 32 bits
|
||||
|
||||
// [4] expand foreground color to 32-bit -> R6
|
||||
lsls r5,r6,#8 // [1] shift foreground color << 8
|
||||
orrs r5,r6 // [1] color expanded to 16 bits
|
||||
lsls r6,r5,#16 // [1] shift 16-bit color << 16
|
||||
orrs r6,r5 // [1] color expanded to 32 bits
|
||||
|
||||
// [1] XOR foreground and background color -> R6
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// [4] load font sample -> R5
|
||||
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
|
||||
ldrb r5,[r3,r5] // [2] load font sample -> R5
|
||||
adds r2,#2 // [1] shift pointer to source text buffer
|
||||
|
||||
// [2] prepare conversion table -> R5
|
||||
lsls r5,#3 // [1] multiply font sample * 8
|
||||
add r5,lr // [1] add pointer to conversion table
|
||||
|
||||
// [6] convert first 4 pixels (higher 4 bits)
|
||||
ldr r7,[r5,#0] // [2] load mask for higher 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
stmia r0!,{r7} // [2] store first 4 pixels
|
||||
|
||||
// [6] convert second 4 pixels (lower 4 bits)
|
||||
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
stmia r0!,{r7} // [2] store second 4 pixels
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r1,#2 // [1] shift loop counter
|
||||
bhi RenderAText_InLoop // [1,2] > 0, render next whole character
|
||||
|
||||
// ---- end inner loop, continue with last character, or start new part
|
||||
|
||||
// continue to outer loop
|
||||
ldr r7,[sp,#36] // load wrap width
|
||||
beq RenderAText_Last // render 1st half of last character
|
||||
ldr r2,[sp,#8] // get base pointer to text data -> R2
|
||||
b RenderAText_OutLoop // go back to outer loop
|
||||
|
||||
.align 2
|
||||
RenderAText_Addr:
|
||||
.word RenderTextMask
|
||||
RenderAText_pSioBase:
|
||||
.word SIO_BASE // addres of SIO base
|
||||
|
|
@ -1,346 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_ATTRIB8
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// render font pixel mask
|
||||
.extern RenderTextMask // u32 RenderTextMask[512];
|
||||
|
||||
// extern "C" u8* RenderAttrib8(u8* dbuf, int x, int y, int w, sSegm* segm)
|
||||
|
||||
// render 8-pixel attribute text GF_ATTRIB8
|
||||
// R0 ... destination data buffer
|
||||
// R1 ... start X coordinate (in pixels, must be multiple of 4)
|
||||
// R2 ... start Y coordinate (in graphics lines)
|
||||
// R3 ... width to display (must be multiple of 4 and > 0)
|
||||
// [stack] ... segm video segment sSegm
|
||||
// Output new pointer to destination data buffer.
|
||||
// 320 pixels takes 11 us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderAttrib8
|
||||
RenderAttrib8:
|
||||
|
||||
// push registers
|
||||
push {r2-r7,lr}
|
||||
mov r4,r8
|
||||
push {r4}
|
||||
|
||||
// Input variables and stack content:
|
||||
// R1 ... start X coordinate
|
||||
// SP+0: R8
|
||||
// SP+4: R2 start Y coordinate (later: base pointer to pixel data row)
|
||||
// SP+8: R3 width to display
|
||||
// SP+12: R4
|
||||
// SP+16: R5
|
||||
// SP+20: R6
|
||||
// SP+24: R7
|
||||
// SP+28: LR
|
||||
// SP+32: video segment (later: wrap width in X direction)
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#32] // load video segment -> R4
|
||||
|
||||
// get wrap width -> [SP+32]
|
||||
ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width
|
||||
movs r7,#3 // mask to align to 32-bit
|
||||
bics r5,r7 // align wrap
|
||||
str r5,[sp,#32] // save wrap width
|
||||
|
||||
// align X coordinate to 32-bit -> R1
|
||||
bics r1,r7
|
||||
|
||||
// align remaining width -> [SP+8]
|
||||
bics r3,r7 // width
|
||||
str r3,[sp,#8] // save new width
|
||||
|
||||
// base pointer to attributes (without X) -> R3
|
||||
lsrs r3,r2,#3 // delete low 3 bits of Y coordinate -> row index
|
||||
ldrh r5,[r4,#SSEGM_WB] // get pitch of rows
|
||||
muls r3,r5 // Y * WB -> offset of row in text buffer
|
||||
ldr r7,[r4,#SSEGM_PAR] // pointer to attributes
|
||||
add r3,r7 // base address of attributes -> R3
|
||||
|
||||
// base pointer to pixel data (without X) -> [SP+4], R2
|
||||
muls r2,r5 // Y * WB -> offset of row in text buffer
|
||||
ldr r5,[r4,#SSEGM_DATA] // pointer to data
|
||||
add r2,r5 // base address of text buffer
|
||||
str r2,[sp,#4] // save pointer to text buffer
|
||||
|
||||
// offset of attributes -> R3
|
||||
subs r3,r2 // offset of attributes, relative to source text buffer
|
||||
|
||||
// prepare pointer to pixel data with X -> R2 (1 position is 1 character + 1 attributes)
|
||||
lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width)
|
||||
add r2,r6 // add index, pointer to source text buffer -> R2
|
||||
|
||||
// prepare pointer to palettes -> R8
|
||||
ldr r5,[r4,#SSEGM_PAR2] // get pointer to palette table -> R4
|
||||
mov r8,r5 // save pointer to palette table
|
||||
|
||||
// prepare pointer to conversion table -> LR
|
||||
ldr r5,RenderAttrib8_Addr // get pointer to conversion table -> R5
|
||||
mov lr,r5 // conversion table -> LR
|
||||
|
||||
// ---- render 2nd half of first character
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... start X coordinate
|
||||
// R2 ... pointer to source text buffer
|
||||
// R3 ... offset of attributes (relative to source text buffer)
|
||||
// R4 ... background color (expanded to 32-bit)
|
||||
// R5 ... (temporary)
|
||||
// R6 ... foreground color (expanded to 32-bit)
|
||||
// R7 ... (temporary)
|
||||
// R8 ... pointer to palette table
|
||||
// LR ... pointer to conversion table
|
||||
// [SP+4] ... base pointer to pixel data (without X)
|
||||
// [SP+8] ... remaining width
|
||||
// [SP+32] ... wrap width
|
||||
|
||||
// check bit 2 of X coordinate - check if image starts with 2nd half of first character
|
||||
lsls r6,r1,#29 // check bit 2 of X coordinate
|
||||
bpl 2f // bit 2 not set, starting even 4-pixels
|
||||
|
||||
// [6] load background color -> R4
|
||||
ldrb r6,[r2,r3] // [2] load color attributes -> R6
|
||||
mov r5,r8 // [1] get palette table -> R5
|
||||
lsrs r4,r6,#4 // [1] prepare index of background color
|
||||
ldrb r4,[r5,r4] // [2] load background color -> R4
|
||||
|
||||
// [4] load foreground color -> R6
|
||||
lsls r6,#28 // [1] isolate lower 4 bits
|
||||
lsrs r6,#28 // [1] mask lower 4 bits
|
||||
ldrb r6,[r5,r6] // [2] load foreground color -> R6
|
||||
|
||||
// [4] expand background color to 32-bit -> R4
|
||||
lsls r5,r4,#8 // [1] shift background color << 8
|
||||
orrs r5,r4 // [1] color expanded to 16 bits
|
||||
lsls r4,r5,#16 // [1] shift 16-bit color << 16
|
||||
orrs r4,r5 // [1] color expanded to 32 bits
|
||||
|
||||
// [4] expand foreground color to 32-bit -> R6
|
||||
lsls r5,r6,#8 // [1] shift foreground color << 8
|
||||
orrs r5,r6 // [1] color expanded to 16 bits
|
||||
lsls r6,r5,#16 // [1] shift 16-bit color << 16
|
||||
orrs r6,r5 // [1] color expanded to 32 bits
|
||||
|
||||
// [1] XOR foreground and background color -> R6
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// [4] load pixel sample -> R5
|
||||
ldrb r5,[r2,#0] // [2] load pixels from source buffer -> R5
|
||||
adds r2,#1 // [1] shift pointer to source buffer
|
||||
|
||||
// [2] prepare conversion table -> R5
|
||||
lsls r5,#3 // [1] multiply font sample * 8
|
||||
add r5,lr // [1] add pointer to conversion table
|
||||
|
||||
// [6] convert second 4 pixels (lower 4 bits)
|
||||
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
stmia r0!,{r7} // [2] store second 4 pixels
|
||||
|
||||
// shift X coordinate
|
||||
adds r1,#4 // shift X coordinate
|
||||
|
||||
// check end of segment
|
||||
ldr r7,[sp,#32] // load wrap width
|
||||
cmp r1,r7 // end of segment?
|
||||
blo 1f
|
||||
movs r1,#0 // reset X coordinate
|
||||
ldr r2,[sp,#4] // get base pointer to pixel data -> R2
|
||||
|
||||
// shift remaining width
|
||||
1: ldr r7,[sp,#8] // get remaining width
|
||||
subs r7,#4 // shift width
|
||||
str r7,[sp,#8] // save new width
|
||||
|
||||
// prepare wrap width - start X -> R7
|
||||
2: ldr r7,[sp,#32] // load wrap width
|
||||
subs r7,r1 // pixels remaining to end of segment
|
||||
|
||||
// ---- start outer loop, render one part of segment
|
||||
// Outer loop variables (* prepared before outer loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... number of characters to generate in one part of segment
|
||||
// R2 ... *pointer to source text buffer
|
||||
// R3 ... *offset of attributes (relative to source text buffer)
|
||||
// R4 ... background color (expanded to 32-bit)
|
||||
// R5 ... (temporary)
|
||||
// R6 ... foreground color (expanded to 32-bit)
|
||||
// R7 ... *wrap width of this segment, later: temporary
|
||||
// R8 ... *pointer to palette table
|
||||
// LR ... *pointer to conversion table
|
||||
// [SP+4] ... *base pointer to pixel data (without X)
|
||||
// [SP+8] ... *remaining width
|
||||
// [SP+32] ... *wrap width
|
||||
|
||||
RenderAttrib8_OutLoop:
|
||||
|
||||
// limit wrap width by total width -> R7
|
||||
ldr r6,[sp,#8] // get remaining width
|
||||
cmp r7,r6 // compare with wrap width
|
||||
bls 2f // width is OK
|
||||
mov r7,r6 // limit wrap width
|
||||
|
||||
// check if remain whole characters
|
||||
2: cmp r7,#8 // check number of remaining pixels
|
||||
bhs 5f // enough characters remain
|
||||
|
||||
// check if 1st part of last character remains
|
||||
cmp r7,#4 // check 1st part of last character
|
||||
blo 3f // all done
|
||||
|
||||
// ---- render 1st part of last character
|
||||
|
||||
RenderAttrib8_Last:
|
||||
|
||||
// [6] load background color -> R4
|
||||
ldrb r6,[r2,r3] // [2] load color attributes -> R6
|
||||
mov r5,r8 // [1] get palette table -> R5
|
||||
lsrs r4,r6,#4 // [1] prepare index of background color
|
||||
ldrb r4,[r5,r4] // [2] load background color -> R4
|
||||
|
||||
// [4] load foreground color -> R6
|
||||
lsls r6,#28 // [1] isolate lower 4 bits
|
||||
lsrs r6,#28 // [1] mask lower 4 bits
|
||||
ldrb r6,[r5,r6] // [2] load foreground color -> R6
|
||||
|
||||
// [4] expand background color to 32-bit -> R4
|
||||
lsls r5,r4,#8 // [1] shift background color << 8
|
||||
orrs r5,r4 // [1] color expanded to 16 bits
|
||||
lsls r4,r5,#16 // [1] shift 16-bit color << 16
|
||||
orrs r4,r5 // [1] color expanded to 32 bits
|
||||
|
||||
// [4] expand foreground color to 32-bit -> R6
|
||||
lsls r5,r6,#8 // [1] shift foreground color << 8
|
||||
orrs r5,r6 // [1] color expanded to 16 bits
|
||||
lsls r6,r5,#16 // [1] shift 16-bit color << 16
|
||||
orrs r6,r5 // [1] color expanded to 32 bits
|
||||
|
||||
// [1] XOR foreground and background color -> R6
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// [4] load pixel sample -> R5
|
||||
ldrb r5,[r2,#0] // [2] load pixels from source buffer -> R5
|
||||
adds r2,#1 // [1] shift pointer to source buffer
|
||||
|
||||
// [2] prepare conversion table -> R5
|
||||
lsls r5,#3 // [1] multiply font sample * 8
|
||||
add r5,lr // [1] add pointer to conversion table
|
||||
|
||||
// [6] convert first 4 pixels (higher 4 bits)
|
||||
ldr r1,[r5,#0] // [2] load mask for higher 4 bits
|
||||
ands r1,r6 // [1] mask foreground color
|
||||
eors r1,r4 // [1] combine with background color
|
||||
stmia r0!,{r1} // [2] store first 4 pixels
|
||||
|
||||
// check if continue with next segment
|
||||
ldr r2,[sp,#4] // get base pointer to pixel data -> R2
|
||||
cmp r7,#4
|
||||
bhi RenderAttrib8_OutLoop
|
||||
|
||||
// pop registers and return
|
||||
3: pop {r4}
|
||||
mov r8,r4
|
||||
pop {r2-r7,pc}
|
||||
|
||||
// ---- prepare to render whole characters
|
||||
|
||||
// prepare number of whole characters to render -> R1
|
||||
5: lsrs r1,r7,#2 // shift to get number of characters*2
|
||||
lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5
|
||||
subs r6,r5 // get remaining width
|
||||
str r6,[sp,#8] // save new remaining width
|
||||
subs r1,#1 // number of characters*2 - 1
|
||||
|
||||
// ---- [38*N-1] start inner loop, render characters in one part of segment
|
||||
// Inner loop variables (* prepared before inner loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... *number of characters to generate*2 - 1 (loop counter)
|
||||
// R2 ... *pointer to source text buffer
|
||||
// R3 ... *offset of attributes (relative to source text buffer)
|
||||
// R4 ... background color (expanded to 32-bit)
|
||||
// R5 ... (temporary)
|
||||
// R6 ... foreground color (expanded to 32-bit)
|
||||
// R7 ... (temporary)
|
||||
// R8 ... *pointer to palette table
|
||||
// LR ... *pointer to conversion table
|
||||
// [SP+4] ... *base pointer to pixel data (without X)
|
||||
// [SP+8] ... *remaining width
|
||||
// [SP+32] ... *wrap width
|
||||
|
||||
RenderAttrib8_InLoop:
|
||||
|
||||
// [6] load background color -> R4
|
||||
ldrb r6,[r2,r3] // [2] load color attributes -> R6
|
||||
mov r5,r8 // [1] get palette table -> R5
|
||||
lsrs r4,r6,#4 // [1] prepare index of background color
|
||||
ldrb r4,[r5,r4] // [2] load background color -> R4
|
||||
|
||||
// [4] load foreground color -> R6
|
||||
lsls r6,#28 // [1] isolate lower 4 bits
|
||||
lsrs r6,#28 // [1] mask lower 4 bits
|
||||
ldrb r6,[r5,r6] // [2] load foreground color -> R6
|
||||
|
||||
// [4] expand background color to 32-bit -> R4
|
||||
lsls r5,r4,#8 // [1] shift background color << 8
|
||||
orrs r5,r4 // [1] color expanded to 16 bits
|
||||
lsls r4,r5,#16 // [1] shift 16-bit color << 16
|
||||
orrs r4,r5 // [1] color expanded to 32 bits
|
||||
|
||||
// [4] expand foreground color to 32-bit -> R6
|
||||
lsls r5,r6,#8 // [1] shift foreground color << 8
|
||||
orrs r5,r6 // [1] color expanded to 16 bits
|
||||
lsls r6,r5,#16 // [1] shift 16-bit color << 16
|
||||
orrs r6,r5 // [1] color expanded to 32 bits
|
||||
|
||||
// [1] XOR foreground and background color -> R6
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// [3] load pixel sample -> R7
|
||||
ldrb r7,[r2,#0] // [2] load pixels from source buffer -> R7
|
||||
adds r2,#1 // [1] shift pointer to source buffer
|
||||
|
||||
// [2] prepare conversion table -> R7
|
||||
lsls r7,#3 // [1] multiply sample * 8
|
||||
add r7,lr // [1] add pointer to conversion table
|
||||
|
||||
// [4] convert first 4 pixels (higher 4 bits)
|
||||
ldr r5,[r7,#0] // [2] load mask for higher 4 bits
|
||||
ands r5,r6 // [1] mask foreground color
|
||||
eors r5,r4 // [1] combine with background color
|
||||
|
||||
// [4] convert second 4 pixels (lower 4 bits)
|
||||
ldr r7,[r7,#4] // [2] load mask for lower 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
|
||||
// [3] write pixels
|
||||
stmia r0!,{r5,r7} // [3] store 8 pixels
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r1,#2 // [1] shift loop counter
|
||||
bhi RenderAttrib8_InLoop // [1,2] > 0, render next whole character
|
||||
|
||||
// ---- end inner loop, continue with last character, or start new part
|
||||
|
||||
// continue to outer loop
|
||||
ldr r7,[sp,#32] // load wrap width
|
||||
beq RenderAttrib8_Last // render 1st half of last character
|
||||
ldr r2,[sp,#4] // get base pointer to pixel data -> R2
|
||||
b RenderAttrib8_OutLoop // go back to outer loop
|
||||
|
||||
.align 2
|
||||
RenderAttrib8_Addr:
|
||||
.word RenderTextMask
|
||||
|
|
@ -1,89 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_COLOR
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// extern "C" u8* RenderColor(u8* dbuf, u32 color, int w);
|
||||
|
||||
// render color GF_COLOR
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... color pattern 4-pixels
|
||||
// R2 ... width of this segment as multiply of 4 pixels (=width in pixels/4)
|
||||
// Output new pointer to destination data buffer.
|
||||
// 320 pixels takes 1.1 us on 151 MHz.
|
||||
// - using only small transfer (24 pixels per loop) takes 1.22 us
|
||||
// - using only single transfer (4 pixels per loop) takes 2.91 us
|
||||
// - memset takes 1.42 us
|
||||
|
||||
.thumb_func
|
||||
.global RenderColor
|
||||
RenderColor:
|
||||
|
||||
// fill memory buffer with u32 words
|
||||
// buf ... data buffer, must be 32-bit aligned
|
||||
// data ... data word to store
|
||||
// num ... number of 32-bit words (= number of bytes/4)
|
||||
// Returns new destination address.
|
||||
// extern "C" u32* MemSet4(u32* buf, u32 data, int num);
|
||||
|
||||
.thumb_func
|
||||
.global MemSet4
|
||||
MemSet4:
|
||||
|
||||
// push registers
|
||||
push {r4,r5,r6,r7,lr}
|
||||
|
||||
// duplicate color pattern
|
||||
mov r3,r1
|
||||
mov r4,r1
|
||||
mov r5,r1
|
||||
mov r6,r1
|
||||
mov r7,r1
|
||||
|
||||
// go to big transfer
|
||||
b 3f
|
||||
|
||||
// ---- [38 per loop] big transfer 120 pixels, speed 0.317 clk per pixel
|
||||
|
||||
// [38] store 30 words (=120 pixels)
|
||||
2: stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels
|
||||
stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels
|
||||
stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels
|
||||
stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels
|
||||
stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels
|
||||
3: subs r2,#30 // [1] decrement number of words
|
||||
bge 2b // [1,2] loop next 30 words
|
||||
adds r2,#30 // [1] restore
|
||||
|
||||
// go to small transfer
|
||||
b 6f
|
||||
|
||||
// ---- [10 per loop] small transfer 24 pixels, speed 0.417 clk per pixel
|
||||
|
||||
// [8] store 6 words (=24 pixels)
|
||||
4: stmia r0!,{r1,r3,r4,r5,r6,r7} // [7] 6 words, 24 pixels
|
||||
6: subs r2,#6 // [1] decrement number of words
|
||||
bge 4b // [1,2] loop next 6 words
|
||||
adds r2,#6 // [1] restore
|
||||
|
||||
// go to single transfer
|
||||
b 8f
|
||||
|
||||
// ---- [5 per loop] single transfer 4 pixels, speed 1.25 clk per pixel
|
||||
|
||||
// [4,5] store 1 word (=4 pixels)
|
||||
7: stmia r0!,{r1} // [2] 1 word, 4 pixels
|
||||
8: subs r2,#1 // [1] loop counter
|
||||
bge 7b // [1,2] next word
|
||||
|
||||
// pop registers
|
||||
pop {r4,r5,r6,r7,pc}
|
||||
|
|
@ -1,335 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_CTEXT
|
||||
//
|
||||
// ****************************************************************************
|
||||
// u32 par SSEGM_PAR pointer to the font
|
||||
// u16 par3 font height
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
#include "hardware/regs/sio.h" // registers of hardware divider
|
||||
#include "hardware/regs/addressmap.h" // SIO base address
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// render font pixel mask
|
||||
.extern RenderTextMask // u32 RenderTextMask[512];
|
||||
|
||||
// extern "C" u8* RenderCText(u8* dbuf, int x, int y, int w, sSegm* segm)
|
||||
|
||||
// render 8-pixel color text GF_CTEXT
|
||||
// R0 ... destination data buffer
|
||||
// R1 ... start X coordinate (in pixels, must be multiple of 4)
|
||||
// R2 ... start Y coordinate (in graphics lines)
|
||||
// R3 ... width to display (must be multiple of 4 and > 0)
|
||||
// [stack] ... segm video segment sSegm
|
||||
// Output new pointer to destination data buffer.
|
||||
// 320 pixels takes 10.4 us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderCText
|
||||
RenderCText:
|
||||
|
||||
// push registers
|
||||
push {r1-r7,lr}
|
||||
|
||||
// Stack content:
|
||||
// SP+0: R1 start X coordinate
|
||||
// SP+4: R2 start Y coordinate (later: base pointer to text data row)
|
||||
// SP+8: R3 width to display
|
||||
// SP+12: R4
|
||||
// SP+16: R5
|
||||
// SP+20: R6
|
||||
// SP+24: R7
|
||||
// SP+28: LR
|
||||
// SP+32: video segment (later: wrap width in X direction)
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#32] // load video segment -> R4
|
||||
|
||||
// start divide Y/font height
|
||||
ldr r6,RenderCText_pSioBase // get address of SIO base -> R6
|
||||
str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate
|
||||
ldrh r2,[r4,#SSEGM_PAR3] // font height -> R2
|
||||
str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height
|
||||
|
||||
// - now we must wait at least 8 clock cycles to get result of division
|
||||
|
||||
// [6] get wrap width -> [SP+32]
|
||||
ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width
|
||||
movs r7,#3 // [1] mask to align to 32-bit
|
||||
bics r5,r7 // [1] align wrap
|
||||
str r5,[sp,#32] // [2] save wrap width
|
||||
|
||||
// [1] align X coordinate to 32-bit
|
||||
bics r1,r7 // [1]
|
||||
|
||||
// [3] align remaining width
|
||||
bics r3,r7 // [1]
|
||||
str r3,[sp,#8] // [2] save new width
|
||||
|
||||
// load result of division Y/font_height -> R6 Y relative at row, R7 Y row
|
||||
// Note: QUOTIENT must be read last
|
||||
ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row
|
||||
ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row
|
||||
|
||||
// pointer to font line -> R3
|
||||
lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long)
|
||||
ldr r3,[r4,#SSEGM_PAR] // get pointer to font
|
||||
add r3,r5 // line offset + font base -> pointer to current font line R3
|
||||
|
||||
// base pointer to text data (without X) -> [SP+4], R2
|
||||
ldrh r5,[r4,#SSEGM_WB] // get pitch of rows
|
||||
muls r2,r5 // Y * WB -> offset of row in text buffer
|
||||
ldr r5,[r4,#SSEGM_DATA] // pointer to data
|
||||
add r2,r5 // base address of text buffer
|
||||
str r2,[sp,#4] // save pointer to text buffer
|
||||
|
||||
// prepare pointer to text data with X -> R2 (1 position is 1 character + 1 background + 1 foreground)
|
||||
lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width)
|
||||
add r2,r6 // add index
|
||||
add r2,r6 // add index*2
|
||||
add r2,r6 // add index*3, pointer to source text buffer -> R2
|
||||
|
||||
// prepare pointer to conversion table -> LR
|
||||
ldr r5,RenderCText_Addr // get pointer to conversion table -> R5
|
||||
mov lr,r5 // conversion table -> LR
|
||||
|
||||
// ---- render 2nd half of first character
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... start X coordinate
|
||||
// R2 ... pointer to source text buffer
|
||||
// R3 ... pointer to font line
|
||||
// R4 ... background color (expanded to 32-bit)
|
||||
// R5 ... (temporary)
|
||||
// R6 ... foreground color (expanded to 32-bit)
|
||||
// R7 ... (temporary)
|
||||
// LR ... pointer to conversion table
|
||||
// [SP+4] ... base pointer to text data (without X)
|
||||
// [SP+8] ... remaining width
|
||||
// [SP+32] ... wrap width
|
||||
|
||||
// check bit 2 of X coordinate - check if image starts with 2nd half of first character
|
||||
lsls r6,r1,#29 // check bit 2 of X coordinate
|
||||
bpl 2f // bit 2 not set, starting even 4-pixels
|
||||
|
||||
// [4] load font sample -> R5
|
||||
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
|
||||
ldrb r5,[r3,r5] // [2] load font sample -> R5
|
||||
|
||||
// [2] load background color -> R4
|
||||
ldrb r4,[r2,#1] // [2] load background color from source text buffer
|
||||
|
||||
// [4] expand background color to 32-bit -> R4
|
||||
lsls r7,r4,#8 // [1] shift background color << 8
|
||||
orrs r7,r4 // [1] color expanded to 16 bits
|
||||
lsls r4,r7,#16 // [1] shift 16-bit color << 16
|
||||
orrs r4,r7 // [1] color expanded to 32 bits
|
||||
|
||||
// [3] load foreground color -> R6
|
||||
ldrb r6,[r2,#2] // [2] load foreground color from source text buffer -> R6
|
||||
adds r2,#3 // [1] shift pointer to source text buffer
|
||||
|
||||
// [4] expand foreground color to 32-bit -> R6
|
||||
lsls r7,r6,#8 // [1] shift foreground color << 8
|
||||
orrs r7,r6 // [1] color expanded to 16 bits
|
||||
lsls r6,r7,#16 // [1] shift 16-bit color << 16
|
||||
orrs r6,r7 // [1] color expanded to 32 bits
|
||||
|
||||
// [1] XOR foreground and background color -> R6
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// [2] prepare conversion table -> R5
|
||||
lsls r5,#3 // [1] multiply font sample * 8
|
||||
add r5,lr // [1] add pointer to conversion table
|
||||
|
||||
// [6] convert second 4 pixels (lower 4 bits)
|
||||
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
stmia r0!,{r7} // [2] store second 4 pixels
|
||||
|
||||
// shift X coordinate
|
||||
adds r1,#4 // shift X coordinate
|
||||
|
||||
// check end of segment
|
||||
ldr r7,[sp,#32] // load wrap width
|
||||
cmp r1,r7 // end of segment?
|
||||
blo 1f
|
||||
movs r1,#0 // reset X coordinate
|
||||
ldr r2,[sp,#4] // get base pointer to text data -> R2
|
||||
|
||||
// shift remaining width
|
||||
1: ldr r7,[sp,#8] // get remaining width
|
||||
subs r7,#4 // shift width
|
||||
str r7,[sp,#8] // save new width
|
||||
|
||||
// prepare wrap width - start X -> R7
|
||||
2: ldr r7,[sp,#32] // load wrap width
|
||||
subs r7,r1 // pixels remaining to end of segment
|
||||
|
||||
// ---- start outer loop, render one part of segment
|
||||
// Outer loop variables (* prepared before outer loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... number of characters to generate in one part of segment
|
||||
// R2 ... *pointer to source text buffer
|
||||
// R3 ... *pointer to font line
|
||||
// R4 ... background color (expanded to 32-bit)
|
||||
// R5 ... (temporary)
|
||||
// R6 ... foreground color (expanded to 32-bit)
|
||||
// R7 ... *wrap width of this segment, later: temporary
|
||||
// LR ... *pointer to conversion table
|
||||
// [SP+4] ... *base pointer to text data (without X)
|
||||
// [SP+8] ... *remaining width
|
||||
// [SP+32] ... *wrap width
|
||||
|
||||
RenderCText_OutLoop:
|
||||
|
||||
// limit wrap width by total width -> R7
|
||||
ldr r6,[sp,#8] // get remaining width
|
||||
cmp r7,r6 // compare with wrap width
|
||||
bls 2f // width is OK
|
||||
mov r7,r6 // limit wrap width
|
||||
|
||||
// check if remain whole characters
|
||||
2: cmp r7,#8 // check number of remaining pixels
|
||||
bhs 5f // enough characters remain
|
||||
|
||||
// check if 1st part of last character remains
|
||||
cmp r7,#4 // check 1st part of last character
|
||||
blo 3f // all done
|
||||
|
||||
// ---- render 1st part of last character
|
||||
|
||||
RenderCText_Last:
|
||||
|
||||
// [4] load font sample -> R5
|
||||
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
|
||||
ldrb r5,[r3,r5] // [2] load font sample -> R5
|
||||
|
||||
// [2] load background color -> R4
|
||||
ldrb r4,[r2,#1] // [2] load background color from source text buffer
|
||||
|
||||
// [4] expand background color to 32-bit -> R4
|
||||
lsls r1,r4,#8 // [1] shift background color << 8
|
||||
orrs r1,r4 // [1] color expanded to 16 bits
|
||||
lsls r4,r1,#16 // [1] shift 16-bit color << 16
|
||||
orrs r4,r1 // [1] color expanded to 32 bits
|
||||
|
||||
// [3] load foreground color -> R6
|
||||
ldrb r6,[r2,#2] // [2] load foreground color from source text buffer -> R6
|
||||
adds r2,#3 // [1] shift pointer to source text buffer
|
||||
|
||||
// [4] expand foreground color to 32-bit
|
||||
lsls r1,r6,#8 // [1] shift foreground color << 8
|
||||
orrs r1,r6 // [1] color expanded to 16 bits
|
||||
lsls r6,r1,#16 // [1] shift 16-bit color << 16
|
||||
orrs r6,r1 // [1] color expanded to 32 bits
|
||||
|
||||
// [1] XOR foreground and background color -> R6
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// [2] prepare conversion table -> R5
|
||||
lsls r5,#3 // [1] multiply font sample * 8
|
||||
add r5,lr // [1] add pointer to conversion table
|
||||
|
||||
// [6] convert first 4 pixels (higher 4 bits)
|
||||
ldr r1,[r5,#0] // [2] load mask for higher 4 bits
|
||||
ands r1,r6 // [1] mask foreground color
|
||||
eors r1,r4 // [1] combine with background color
|
||||
stmia r0!,{r1} // [2] store first 4 pixels
|
||||
|
||||
// check if continue with next segment
|
||||
ldr r2,[sp,#4] // get base pointer to text data -> R2
|
||||
cmp r7,#4
|
||||
bhi RenderCText_OutLoop
|
||||
|
||||
// pop registers and return
|
||||
3: pop {r1-r7,pc}
|
||||
|
||||
// ---- prepare to render whole characters
|
||||
|
||||
// prepare number of whole characters to render -> R1
|
||||
5: lsrs r1,r7,#2 // shift to get number of characters*2
|
||||
lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5
|
||||
subs r6,r5 // get remaining width
|
||||
str r6,[sp,#8] // save new remaining width
|
||||
subs r1,#1 // number of characters*2 - 1
|
||||
|
||||
// ---- [35*N-1] start inner loop, render characters in one part of segment
|
||||
// Inner loop variables (* prepared before inner loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... *number of characters to generate*2 - 1 (loop counter)
|
||||
// R2 ... *pointer to source text buffer
|
||||
// R3 ... *pointer to font line
|
||||
// R4 ... background color (expanded to 32-bit)
|
||||
// R5 ... font sample
|
||||
// R6 ... foreground color (expanded to 32-bit)
|
||||
// R7 ... (temporary)
|
||||
// LR ... *pointer to conversion table
|
||||
|
||||
RenderCText_InLoop:
|
||||
|
||||
// [4] load font sample -> R5
|
||||
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
|
||||
ldrb r5,[r3,r5] // [2] load font sample -> R5
|
||||
|
||||
// [2] load background color -> R4
|
||||
ldrb r4,[r2,#1] // [2] load background color from source text buffer
|
||||
|
||||
// [4] expand background color to 32-bit -> R4
|
||||
lsls r7,r4,#8 // [1] shift background color << 8
|
||||
orrs r7,r4 // [1] color expanded to 16 bits
|
||||
lsls r4,r7,#16 // [1] shift 16-bit color << 16
|
||||
orrs r4,r7 // [1] color expanded to 32 bits
|
||||
|
||||
// [3] load foreground color -> R6
|
||||
ldrb r6,[r2,#2] // [2] load foreground color from source text buffer -> R6
|
||||
adds r2,#3 // [1] shift pointer to source text buffer
|
||||
|
||||
// [4] expand foreground color to 32-bit
|
||||
lsls r7,r6,#8 // [1] shift foreground color << 8
|
||||
orrs r7,r6 // [1] color expanded to 16 bits
|
||||
lsls r6,r7,#16 // [1] shift 16-bit color << 16
|
||||
orrs r6,r7 // [1] color expanded to 32 bits
|
||||
|
||||
// [1] XOR foreground and background color -> R6
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// [2] prepare conversion table -> R5
|
||||
lsls r5,#3 // [1] multiply font sample * 8
|
||||
add r5,lr // [1] add pointer to conversion table
|
||||
|
||||
// [6] convert first 4 pixels (higher 4 bits)
|
||||
ldr r7,[r5,#0] // [2] load mask for higher 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
stmia r0!,{r7} // [2] store first 4 pixels
|
||||
|
||||
// [6] convert second 4 pixels (lower 4 bits)
|
||||
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
stmia r0!,{r7} // [2] store second 4 pixels
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r1,#2 // [1] shift loop counter
|
||||
bhi RenderCText_InLoop // [1,2] > 0, render next whole character
|
||||
|
||||
// ---- end inner loop, continue with last character, or start new part
|
||||
|
||||
// continue to outer loop
|
||||
ldr r7,[sp,#32] // load wrap width
|
||||
beq RenderCText_Last // render 1st half of last character
|
||||
ldr r2,[sp,#4] // get base pointer to text data -> R2
|
||||
b RenderCText_OutLoop // go back to outer loop
|
||||
|
||||
.align 2
|
||||
RenderCText_Addr:
|
||||
.word RenderTextMask
|
||||
RenderCText_pSioBase:
|
||||
.word SIO_BASE // addres of SIO base
|
||||
|
|
@ -1,436 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_DTEXT
|
||||
//
|
||||
// ****************************************************************************
|
||||
// u32 par SSEGM_PAR pointer to the font
|
||||
// u32 par2 SSEGM_PAR2 pointer to font gradient
|
||||
// u16 par3 LOW background color, HIGH font height
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
#include "hardware/regs/sio.h" // registers of hardware divider
|
||||
#include "hardware/regs/addressmap.h" // SIO base address
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// render font pixel mask
|
||||
.extern RenderTextMask // u32 RenderTextMask[512];
|
||||
|
||||
// extern "C" u8* RenderDText(u8* dbuf, int x, int y, int w, sSegm* segm)
|
||||
|
||||
// render 8-pixel double gradient color text GF_DTEXT
|
||||
// R0 ... destination data buffer
|
||||
// R1 ... start X coordinate (in pixels, must be multiple of 4)
|
||||
// R2 ... start Y coordinate (in graphics lines)
|
||||
// R3 ... width to display (must be multiple of 4 and > 0)
|
||||
// [stack] ... segm video segment sSegm
|
||||
// Output new pointer to destination data buffer.
|
||||
// 320 pixels takes us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderDText
|
||||
RenderDText:
|
||||
|
||||
// Stack content:
|
||||
// SP+0: R8
|
||||
// SP+4: R1 start X coordinate (later: base pointer to gradient array)
|
||||
// SP+8: R2 start Y coordinate (later: base pointer to text data row)
|
||||
// SP+12: R3 width to display
|
||||
// SP+16: R4
|
||||
// SP+20: R5
|
||||
// SP+24: R6
|
||||
// SP+28: R7
|
||||
// SP+32: LR
|
||||
// SP+36: video segment (later: wrap width in X direction)
|
||||
|
||||
// push registers
|
||||
push {r1-r7,lr}
|
||||
mov r4,r8
|
||||
push {r4}
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#36] // load video segment -> R4
|
||||
|
||||
// start divide Y/font height
|
||||
ldr r6,RenderDText_pSioBase // get address of SIO base -> R6
|
||||
str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate
|
||||
ldrb r2,[r4,#SSEGM_PAR3+1] // font height -> R2
|
||||
str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height
|
||||
|
||||
// - now we must wait at least 8 clock cycles to get result of division
|
||||
|
||||
// [6] get wrap width -> [SP+36]
|
||||
ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width
|
||||
movs r7,#3 // [1] mask to align to 32-bit
|
||||
bics r5,r7 // [1] align wrap
|
||||
str r5,[sp,#36] // [1] save wrap width
|
||||
|
||||
// [1] align X coordinate to 32-bit
|
||||
bics r1,r7 // [1]
|
||||
|
||||
// [3] align remaining width
|
||||
bics r3,r7 // [1]
|
||||
str r3,[sp,#12] // [2] save new width
|
||||
|
||||
// load result of division Y/font_height -> R6 Y relative at row, R7 Y row
|
||||
// Note: QUOTIENT must be read last
|
||||
ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row
|
||||
ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row
|
||||
|
||||
// pointer to font line -> R3
|
||||
lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long)
|
||||
ldr r3,[r4,#SSEGM_PAR] // get pointer to font
|
||||
add r3,r5 // line offset + font base -> pointer to current font line R3
|
||||
mov r8,r3
|
||||
|
||||
// base pointer to text data (without X) -> [SP+8], R2
|
||||
ldrh r5,[r4,#SSEGM_WB] // get pitch of rows
|
||||
muls r2,r5 // Y * WB -> offset of row in text buffer
|
||||
ldr r5,[r4,#SSEGM_DATA] // pointer to data
|
||||
add r2,r5 // base address of text buffer
|
||||
str r2,[sp,#8] // save pointer to text buffer
|
||||
|
||||
// base pointer to gradient array -> [SP+4], R3
|
||||
ldr r3,[r4,#SSEGM_PAR2] // pointer to graient array
|
||||
str r3,[sp,#4] // save pointer to gradient array
|
||||
|
||||
// prepare pointer to text data with X -> R2
|
||||
lsrs r6,r1,#3 // convert X to gradient index
|
||||
lsls r6,#2 // round to 4-bytes
|
||||
add r3,r6 // pointer to source gradient array
|
||||
lsrs r6,r1,#4 // convert X to character index (1 character is 16 pixels width)
|
||||
add r2,r6 // pointer to source text buffer -> R2
|
||||
|
||||
// prepare background color, expand to 32 bits -> R4
|
||||
ldrb r4,[r4,#SSEGM_PAR3] // load background color
|
||||
lsls r5,r4,#8 // shift background color << 8
|
||||
orrs r5,r4 // color expanded to 16 bits
|
||||
lsls r4,r5,#16 // shift 16-bit color << 16
|
||||
orrs r4,r5 // color expanded to 32 bits
|
||||
|
||||
// prepare pointer to conversion table -> LR
|
||||
ldr r5,RenderDText_Addr // get pointer to conversion table -> R5
|
||||
mov lr,r5 // conversion table -> LR
|
||||
|
||||
// ---- render 2nd half of first character
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... start X coordinate
|
||||
// R2 ... pointer to source text buffer
|
||||
// R3 ... pointer to gradient array
|
||||
// R4 ... background color (expanded to 32-bit)
|
||||
// R5..R7 ... (temporary)
|
||||
// R8 ... pointer to font line
|
||||
// LR ... pointer to conversion table
|
||||
// [SP+4] ... base pointer to gradient array
|
||||
// [SP+8] ... base pointer to text data (without X)
|
||||
// [SP+12] ... remaining width
|
||||
// [SP+36] ... wrap width
|
||||
|
||||
// check if X is aligned
|
||||
lsls r6,r1,#(32-4) // check if X is aligned
|
||||
beq 2f // X not aligned
|
||||
|
||||
// shift X coordinate
|
||||
lsrs r5,r6,#(32-4) // [1] X pixel offset in last character -> R5
|
||||
movs r6,16 // character width
|
||||
subs r6,r5 // pixels remain
|
||||
adds r1,r6 // shift X coordinate (align to next character)
|
||||
ldr r7,[sp,#12]
|
||||
subs r7,r6 // shift width
|
||||
str r7,[sp,#12]
|
||||
|
||||
push {r1}
|
||||
|
||||
// [6] load font sample -> R7
|
||||
ldrb r7,[r2,#0] // [2] load character from source text buffer -> R7
|
||||
adds r2,#1 // [1] shift pointer to source text buffer
|
||||
add r7,r8 // [1] pointer to font line
|
||||
ldrb r7,[r7] // [2] load font sample -> R7
|
||||
|
||||
// [3] load foreground color, XOR with background -> R6
|
||||
ldmia r3!,{r6} // [2] load foreground color from gradient buffer
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// [2] prepare conversion table -> R7
|
||||
lsls r7,#3 // [1] multiply font sample * 8
|
||||
add r7,lr // [1] add pointer to conversion table
|
||||
|
||||
// [4] convert first 4 pixels (higher 4 bits) -> R1
|
||||
ldr r1,[r7,#0] // [2] load mask for higher 4 bits
|
||||
ands r1,r6 // [1] mask foreground color
|
||||
eors r1,r4 // [1] combine with background color
|
||||
|
||||
cmp r5,#4 // check start position of X
|
||||
bhi 3f // > 4
|
||||
|
||||
// [20] store 8 pixels
|
||||
lsrs r1,#16 // [1]
|
||||
strb r1,[r0,#0] // [2]
|
||||
strb r1,[r0,#1] // [2]
|
||||
lsrs r1,#8 // [1]
|
||||
strb r1,[r0,#2] // [2]
|
||||
strb r1,[r0,#3] // [2]
|
||||
adds r0,#4 // [1]
|
||||
|
||||
// [3] load foreground color, XOR with background -> R6
|
||||
3: ldmia r3!,{r6} // [2] load foreground color from gradient buffer
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// [4] convert second 4 pixels (lower 4 bits)
|
||||
ldr r1,[r7,#4] // [2] load mask for lower 4 bits
|
||||
ands r1,r6 // [1] mask foreground color
|
||||
eors r1,r4 // [1] combine with background color
|
||||
|
||||
// store 8 pixels
|
||||
cmp r5,#8 // check start position of X
|
||||
bhi 4f // > 8
|
||||
|
||||
strb r1,[r0,#0] // [2]
|
||||
strb r1,[r0,#1] // [2]
|
||||
lsrs r1,#8 // [1]
|
||||
strb r1,[r0,#2] // [2]
|
||||
strb r1,[r0,#3] // [2]
|
||||
lsls r1,#8
|
||||
adds r0,#4
|
||||
|
||||
4: lsrs r1,#16 // [1]
|
||||
strb r1,[r0,#0] // [2]
|
||||
strb r1,[r0,#1] // [2]
|
||||
lsrs r1,#8 // [1]
|
||||
strb r1,[r0,#2] // [2]
|
||||
strb r1,[r0,#3] // [2]
|
||||
adds r0,#4 // [1]
|
||||
|
||||
pop {r1}
|
||||
|
||||
// check end of segment
|
||||
ldr r7,[sp,#36] // load wrap width
|
||||
cmp r1,r7 // end of segment?
|
||||
blo 2f
|
||||
movs r1,#0 // reset X coordinate
|
||||
ldr r2,[sp,#8] // get base pointer to text data -> R2
|
||||
ldr r3,[sp,#4] // get base pointer to gradient array -> R3
|
||||
|
||||
// prepare wrap width - start X -> R7
|
||||
2: ldr r7,[sp,#36] // load wrap width
|
||||
subs r7,r1 // pixels remaining to end of segment
|
||||
|
||||
// ---- start outer loop, render one part of segment
|
||||
// Outer loop variables (* prepared before outer loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... number of characters to generate in one part of segment
|
||||
// R2 ... *pointer to source text buffer
|
||||
// R3 ... *pointer to gradient array
|
||||
// R4 ... *background color (expanded to 32-bit)
|
||||
// R5 ... (temporary)
|
||||
// R6 ... (temporary)
|
||||
// R7 ... *wrap width of this segment, later: temporary
|
||||
// R8 ... *pointer to font line
|
||||
// LR ... *pointer to conversion table
|
||||
// [SP+4] ... base pointer to gradient array
|
||||
// [SP+8] ... base pointer to text data (without X)
|
||||
// [SP+12] ... remaining width
|
||||
// [SP+36] ... wrap width
|
||||
|
||||
RenderDText_OutLoop:
|
||||
|
||||
// limit wrap width by total width -> R7
|
||||
ldr r6,[sp,#12] // get remaining width
|
||||
cmp r7,r6 // compare with wrap width
|
||||
bls 2f // width is OK
|
||||
mov r7,r6 // limit wrap width
|
||||
|
||||
// check if remain whole characters
|
||||
2: cmp r7,#16 // check number of remaining pixels
|
||||
bhs 5f // enough characters remain
|
||||
|
||||
// check if 1st part of last character remains
|
||||
cmp r7,#4 // check 1st part of last character
|
||||
blo 3f // all done
|
||||
mov r1,r7 // width to render
|
||||
|
||||
// ---- render 1st part of last character
|
||||
|
||||
RenderDText_Last:
|
||||
|
||||
push {r7}
|
||||
|
||||
// [6] load font sample -> R7
|
||||
ldrb r7,[r2,#0] // [2] load character from source text buffer -> R7
|
||||
adds r2,#1 // [1] shift pointer to source text buffer
|
||||
add r7,r8 // [1] pointer to font line
|
||||
ldrb r7,[r7] // [2] load font sample -> R7
|
||||
|
||||
// [3] load foreground color, XOR with background -> R6
|
||||
ldmia r3!,{r6} // [2] load foreground color from gradient buffer
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// [2] prepare conversion table -> R7
|
||||
lsls r7,#3 // [1] multiply font sample * 8
|
||||
add r7,lr // [1] add pointer to conversion table
|
||||
|
||||
// [4] convert first 4 pixels (higher 4 bits) -> R5
|
||||
ldr r5,[r7,#0] // [2] load mask for higher 4 bits
|
||||
ands r5,r6 // [1] mask foreground color
|
||||
eors r5,r4 // [1] combine with background color
|
||||
|
||||
// [20] store 8 pixels
|
||||
strb r5,[r0,#0] // [2]
|
||||
strb r5,[r0,#1] // [2]
|
||||
lsrs r5,#8 // [1]
|
||||
strb r5,[r0,#2] // [2]
|
||||
strb r5,[r0,#3] // [2]
|
||||
adds r0,#4 // [1]
|
||||
|
||||
cmp r1,#4
|
||||
bls 4f
|
||||
|
||||
lsrs r5,#8 // [1]
|
||||
strb r5,[r0,#0] // [2]
|
||||
strb r5,[r0,#1] // [2]
|
||||
lsrs r5,#8 // [1]
|
||||
strb r5,[r0,#2] // [2]
|
||||
strb r5,[r0,#3] // [2]
|
||||
adds r0,#4 // [1]
|
||||
|
||||
cmp r1,#8
|
||||
bls 4f
|
||||
|
||||
// [3] load foreground color, XOR with background -> R6
|
||||
ldmia r3!,{r6} // [2] load foreground color from gradient buffer
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// [4] convert second 4 pixels (lower 4 bits)
|
||||
ldr r7,[r7,#4] // [2] load mask for lower 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
|
||||
// [20] store 8 pixels
|
||||
strb r7,[r0,#0] // [2]
|
||||
strb r7,[r0,#1] // [2]
|
||||
lsrs r7,#8 // [1]
|
||||
strb r7,[r0,#2] // [2]
|
||||
strb r7,[r0,#3] // [2]
|
||||
adds r0,#4 // [1]
|
||||
|
||||
// check if continue with next segment
|
||||
4: pop {r7}
|
||||
|
||||
ldr r2,[sp,#8] // get base pointer to text data -> R2
|
||||
ldr r3,[sp,#4] // get base pointer to gradient array -> R3
|
||||
cmp r7,#16
|
||||
bhs RenderDText_OutLoop
|
||||
|
||||
// pop registers and return
|
||||
3: pop {r4}
|
||||
mov r8,r4
|
||||
pop {r1-r7,pc}
|
||||
|
||||
// ---- prepare to render whole characters
|
||||
|
||||
// prepare number of whole characters to render -> R1
|
||||
5: lsrs r1,r7,#2 // shift to get number of characters*2
|
||||
lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5
|
||||
subs r6,r5 // get remaining width
|
||||
str r6,[sp,#12] // save new remaining width
|
||||
subs r1,#3 // number of characters*2 - 3
|
||||
|
||||
// ---- [65*N-1] start inner loop, render characters in one part of segment
|
||||
// Inner loop variables (* prepared before inner loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... *number of characters to generate*2 - 3 (loop counter)
|
||||
// R2 ... *pointer to source text buffer
|
||||
// R3 ... *pointer to gradient array
|
||||
// R4 ... *background color (expanded to 32-bit)
|
||||
// R5 ... (temporary)
|
||||
// R6 ... foreground color
|
||||
// R7 ... font sample
|
||||
// R8 ... *pointer to font line
|
||||
// LR ... *pointer to conversion table
|
||||
// [SP+4] ... base pointer to gradient array
|
||||
// [SP+8] ... base pointer to text data (without X)
|
||||
// [SP+12] ... remaining width
|
||||
// [SP+36] ... wrap width
|
||||
|
||||
RenderDText_InLoop:
|
||||
|
||||
// [6] load font sample -> R7
|
||||
ldrb r7,[r2,#0] // [2] load character from source text buffer -> R7
|
||||
adds r2,#1 // [1] shift pointer to source text buffer
|
||||
add r7,r8 // [1] pointer to font line
|
||||
ldrb r7,[r7] // [2] load font sample -> R7
|
||||
|
||||
// [3] load foreground color, XOR with background -> R6
|
||||
ldmia r3!,{r6} // [2] load foreground color from gradient buffer
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// [2] prepare conversion table -> R7
|
||||
lsls r7,#3 // [1] multiply font sample * 8
|
||||
add r7,lr // [1] add pointer to conversion table
|
||||
|
||||
// [4] convert first 4 pixels (higher 4 bits) -> R5
|
||||
ldr r5,[r7,#0] // [2] load mask for higher 4 bits
|
||||
ands r5,r6 // [1] mask foreground color
|
||||
eors r5,r4 // [1] combine with background color
|
||||
|
||||
// [20] store 8 pixels
|
||||
strb r5,[r0,#0] // [2]
|
||||
strb r5,[r0,#1] // [2]
|
||||
lsrs r5,#8 // [1]
|
||||
strb r5,[r0,#2] // [2]
|
||||
strb r5,[r0,#3] // [2]
|
||||
lsrs r5,#8 // [1]
|
||||
strb r5,[r0,#4] // [2]
|
||||
strb r5,[r0,#5] // [2]
|
||||
lsrs r5,#8 // [1]
|
||||
strb r5,[r0,#6] // [2]
|
||||
strb r5,[r0,#7] // [2]
|
||||
adds r0,#8 // [1]
|
||||
|
||||
// [3] load foreground color, XOR with background -> R6
|
||||
ldmia r3!,{r6} // [2] load foreground color from gradient buffer
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// [4] convert second 4 pixels (lower 4 bits)
|
||||
ldr r7,[r7,#4] // [2] load mask for lower 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
|
||||
// [20] store 8 pixels
|
||||
strb r7,[r0,#0] // [2]
|
||||
strb r7,[r0,#1] // [2]
|
||||
lsrs r7,#8 // [1]
|
||||
strb r7,[r0,#2] // [2]
|
||||
strb r7,[r0,#3] // [2]
|
||||
lsrs r7,#8 // [1]
|
||||
strb r7,[r0,#4] // [2]
|
||||
strb r7,[r0,#5] // [2]
|
||||
lsrs r7,#8 // [1]
|
||||
strb r7,[r0,#6] // [2]
|
||||
strb r7,[r0,#7] // [2]
|
||||
adds r0,#8 // [1]
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r1,#4 // [1] shift loop counter
|
||||
bhi RenderDText_InLoop // [1,2] > 0, render next whole character
|
||||
|
||||
// ---- end inner loop, continue with last character, or start new part
|
||||
|
||||
// continue to outer loop
|
||||
ldr r7,[sp,#36] // load wrap width
|
||||
adds r1,#3 // return size of last tile
|
||||
lsls r1,#2 // convert back to pixels
|
||||
bne RenderDText_Last // render 1st half of last character
|
||||
ldr r2,[sp,#8] // get base pointer to text data -> R2
|
||||
ldr r3,[sp,#4] // get base pointer to gradient array -> R3
|
||||
b RenderDText_OutLoop // go back to outer loop
|
||||
|
||||
.align 2
|
||||
RenderDText_Addr:
|
||||
.word RenderTextMask
|
||||
RenderDText_pSioBase:
|
||||
.word SIO_BASE // addres of SIO base
|
||||
|
|
@ -1,160 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render LAYERMODE_FASTSPRITE*
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// extern "C" u32* RenderFastSprite(u32* cbuf, int y, sLayer* scr, u8* buf)
|
||||
|
||||
// render layers with fast sprites LAYERMODE_FASTSPRITE*
|
||||
// R0 ... cbuf pointer to control buffer
|
||||
// R1 ... y coordinate of scanline
|
||||
// R2 ... scr pointer to layer screen structure sLayer
|
||||
// R3 ... buf pointer to destination data buffer with transparent color
|
||||
// Output new pointer to control buffer.
|
||||
|
||||
.thumb_func
|
||||
.global RenderFastSprite
|
||||
RenderFastSprite:
|
||||
|
||||
// push registers
|
||||
push {r1-r7,lr}
|
||||
|
||||
// Stack content and input variables:
|
||||
// R0 cbuf pointer to control buffer
|
||||
// SP+0: R1 Y coordinate of scanline
|
||||
// SP+4: R2 scr pointer to layer screen structure sLayer, later: num number of sprites
|
||||
// SP+8: R3 buf pointer to data buffer with transparent color
|
||||
// SP+12: R4
|
||||
// SP+16: R5
|
||||
// SP+20: R6
|
||||
// SP+24: R7
|
||||
// SP+28: LR
|
||||
|
||||
// Variables:
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X0 absolute coordinate counted from start
|
||||
// R2 ... W layer screen width
|
||||
// R3 ... s pointer to current sprite, later: absolute X coordinate of start of sprite
|
||||
// R4 ... Y2 coordinate relative to sprite base, later: s->img[Y2*WB] address of sprite line
|
||||
// R5 ... relative X2 coordinate of sprite segment
|
||||
// R6 ... W2 width of sprite segment
|
||||
// R7 ... (temporary)
|
||||
// LR ... spr pointer to list of sprites
|
||||
// [SP+0] ... (R1) Y coordinate of scanline
|
||||
// [SP+4] ... (R2) num number of sprites (loop counter)
|
||||
// [SP+8] ... (R3) buf pointer to data buffer with transparent color
|
||||
|
||||
// load pointer to list of sprites -> LR
|
||||
ldr r7,[r2,#SLAYER_IMG]
|
||||
mov lr,r7
|
||||
|
||||
// load number of sprites -> [SP+4]
|
||||
ldrh r7,[r2,#SLAYER_SPRITENUM]
|
||||
str r7,[sp,#4]
|
||||
|
||||
// load screen width -> R2
|
||||
ldrh r2,[r2,#SLAYER_W]
|
||||
|
||||
// reset absolute coordinate X0 -> R1
|
||||
movs r1,#0 // R1 <- 0
|
||||
|
||||
// count number of sprites, end if num = 0
|
||||
2: ldr r7,[sp,#4] // get number of sprites
|
||||
subs r7,#1 // decrement number of sprites
|
||||
blo 8f // no other sprites
|
||||
str r7,[sp,#4] // save new number of sprites
|
||||
|
||||
// get pointer to next sprite -> R3
|
||||
mov r7,lr // pointer to list of sprites -> R7
|
||||
ldmia r7!,{r3} // pointer to sprite -> R3
|
||||
mov lr,r7 // save new pointer to list of sprites -> LR
|
||||
|
||||
// prepare Y2 coordinate relative to sprite base -> R4
|
||||
ldrh r7,[r3,#SSPRITE_Y] // get Y coordinate of the sprite -> R7
|
||||
sxth r7,r7 // signed extend Y2
|
||||
ldr r4,[sp,#0] // Y coordinate of scanline -> R4
|
||||
subs r4,r7 // relative coordinate Y2 = Y - s->y
|
||||
|
||||
// check if Y2 coordinate is valid
|
||||
bmi 2b // Y2 < 0, go next sprite
|
||||
ldrh r7,[r3,#SSPRITE_H] // get sprite height
|
||||
cmp r4,r7 // check sprite height
|
||||
bge 2b // Y2 >= s->h, go next sprite
|
||||
|
||||
// get relative start X2 coordinate of this line segment -> R5
|
||||
ldr r7,[r3,#SSPRITE_X0] // get table of X0 of lines
|
||||
ldrb r5,[r7,r4] // get X2 coordinate -> R5
|
||||
lsls r5,#2 // convert X2 coordinate to byte offset
|
||||
|
||||
// get width W2 of this line segment -> R6
|
||||
ldr r7,[r3,#SSPRITE_W0] // get table of W0 of lines
|
||||
ldrb r6,[r7,r4] // get W2 width -> R6
|
||||
lsls r6,#2 // convert W2 width to bytes
|
||||
|
||||
// get address of sprite line s->img[Y2*s->wb] -> R4
|
||||
ldrh r7,[r3,#SSPRITE_WB] // get sprite pitch w->wb
|
||||
muls r4,r4,r7 // sprite offset Y2*s->wb
|
||||
ldr r7,[r3,#SSPRITE_IMG] // get sprite image
|
||||
add r4,r7 // line address -> R4
|
||||
|
||||
// get absolute X coordinate of start of line -> R3
|
||||
ldrh r3,[r3,#SSPRITE_X] // get sprite X coordinate -> R3
|
||||
sxth r3,r3 // signed extend X
|
||||
adds r3,r3,r5 // s->X + X2, X coordinate of start of line -> R3
|
||||
|
||||
// check if sprite coordinate X lies below current X0 coordinate
|
||||
subs r7,r1,r3 // difference X0 - X -> R7
|
||||
ble 3f // X0 <= X, sprite does not lie below current X0
|
||||
|
||||
// sprite correction
|
||||
adds r5,r7 // X2 += X0 - X
|
||||
subs r6,r7 // W2 -= X0 - X
|
||||
mov r3,r1 // X = X0
|
||||
|
||||
// check line length W2
|
||||
3: subs r7,r2,r3 // W - X -> R7
|
||||
cmp r6,r7 // compare W2 with W - X
|
||||
ble 4f // W2 <= W - X, length is OK
|
||||
mov r6,r7 // limit segment width W2 -> R6
|
||||
|
||||
// align to word
|
||||
4: movs r7,#3 // mask to word
|
||||
bics r3,r7 // align X
|
||||
bics r5,r7 // align X2
|
||||
bics r6,r7 // align W2
|
||||
ble 2b // no W2 left (W2 <= 0)
|
||||
|
||||
// decode space before sprite
|
||||
subs r7,r3,r1 // X - X0 -> R7
|
||||
ble 5f // no space left before sprite
|
||||
lsrs r7,#2 // number of words (X - X0)/4
|
||||
stmia r0!,{r7} // write number of words
|
||||
ldr r7,[sp,#8] // pointer to data buffer -> R7
|
||||
stmia r0!,{r7} // write address
|
||||
mov r1,r3 // shift X0
|
||||
|
||||
// write sprite line
|
||||
5: adds r7,r4,r5 // address of pixel &s->img[y2*s->wb+x2] -> R7
|
||||
lsrs r4,r6,#2 // W2/4 line length -> R4
|
||||
stmia r0!,{r4,r7} // write sprite length and address
|
||||
adds r1,r6 // add X0 += W2
|
||||
b 2b // next sprite
|
||||
|
||||
// clear rest of scanline
|
||||
8: subs r2,r1 // subtract W - X0
|
||||
bls 9f // no pixels left
|
||||
lsrs r2,#2 // (W - X0)/4
|
||||
ldr r3,[sp,#8] // pointer to data buffer -> R3
|
||||
stmia r0!,{r2,r3} // write number of pixels and address
|
||||
|
||||
// pop registers and return
|
||||
9: pop {r1-r7,pc}
|
||||
|
|
@ -1,313 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_FTEXT
|
||||
//
|
||||
// ****************************************************************************
|
||||
// u32 par SSEGM_PAR pointer to the font
|
||||
// u32 par2 SSEGM_PAR2 background color
|
||||
// u16 par3 font height
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
#include "hardware/regs/sio.h" // registers of hardware divider
|
||||
#include "hardware/regs/addressmap.h" // SIO base address
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// render font pixel mask
|
||||
.extern RenderTextMask // u32 RenderTextMask[512];
|
||||
|
||||
// extern "C" u8* RenderFText(u8* dbuf, int x, int y, int w, sSegm* segm)
|
||||
|
||||
// render 8-pixel foreground color text GF_FTEXT
|
||||
// R0 ... destination data buffer
|
||||
// R1 ... start X coordinate (in pixels, must be multiple of 4)
|
||||
// R2 ... start Y coordinate (in graphics lines)
|
||||
// R3 ... width to display (must be multiple of 4 and > 0)
|
||||
// [stack] ... segm video segment sSegm
|
||||
// Output new pointer to destination data buffer.
|
||||
// 320 pixels takes 8.7 us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderFText
|
||||
RenderFText:
|
||||
|
||||
// push registers
|
||||
push {r1-r7,lr}
|
||||
|
||||
// Stack content:
|
||||
// SP+0: R1 start X coordinate
|
||||
// SP+4: R2 start Y coordinate (later: base pointer to text data row)
|
||||
// SP+8: R3 width to display
|
||||
// SP+12: R4
|
||||
// SP+16: R5
|
||||
// SP+20: R6
|
||||
// SP+24: R7
|
||||
// SP+28: LR
|
||||
// SP+32: video segment (later: wrap width in X direction)
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#32] // load video segment -> R4
|
||||
|
||||
// start divide Y/font height
|
||||
ldr r6,RenderFText_pSioBase // get address of SIO base -> R6
|
||||
str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate
|
||||
ldrh r2,[r4,#SSEGM_PAR3] // font height -> R2
|
||||
str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height
|
||||
|
||||
// - now we must wait at least 8 clock cycles to get result of division
|
||||
|
||||
// [6] get wrap width -> [SP+32]
|
||||
ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width
|
||||
movs r7,#3 // [1] mask to align to 32-bit
|
||||
bics r5,r7 // [1] align wrap
|
||||
str r5,[sp,#32] // [2] save wrap width
|
||||
|
||||
// [1] align X coordinate to 32-bit
|
||||
bics r1,r7 // [1]
|
||||
|
||||
// [3] align remaining width
|
||||
bics r3,r7 // [1]
|
||||
str r3,[sp,#8] // [2] save new width
|
||||
|
||||
// load result of division Y/font_height -> R6 Y relative at row, R7 Y row
|
||||
// Note: QUOTIENT must be read last
|
||||
ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row
|
||||
ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row
|
||||
|
||||
// pointer to font line -> R3
|
||||
lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long)
|
||||
ldr r3,[r4,#SSEGM_PAR] // get pointer to font
|
||||
add r3,r5 // line offset + font base -> pointer to current font line R3
|
||||
|
||||
// base pointer to text data (without X) -> [SP+4], R2
|
||||
ldrh r5,[r4,#SSEGM_WB] // get pitch of rows
|
||||
muls r2,r5 // Y * WB -> offset of row in text buffer
|
||||
ldr r5,[r4,#SSEGM_DATA] // pointer to data
|
||||
add r2,r5 // base address of text buffer
|
||||
str r2,[sp,#4] // save pointer to text buffer
|
||||
|
||||
// prepare pointer to text data with X -> R2
|
||||
lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width)
|
||||
lsls r6,#1 // convert to character offset (1 position is: 1 character + 1 color)
|
||||
add r2,r6 // pointer to source text buffer -> R2
|
||||
|
||||
// prepare background color, expand to 32 bits -> R4
|
||||
ldrb r4,[r4,#SSEGM_PAR2] // load background color
|
||||
lsls r5,r4,#8 // shift background color << 8
|
||||
orrs r5,r4 // color expanded to 16 bits
|
||||
lsls r4,r5,#16 // shift 16-bit color << 16
|
||||
orrs r4,r5 // color expanded to 32 bits
|
||||
|
||||
// prepare pointer to conversion table -> LR
|
||||
ldr r5,RenderFText_Addr // get pointer to conversion table -> R5
|
||||
mov lr,r5 // conversion table -> LR
|
||||
|
||||
// ---- render 2nd half of first character
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... start X coordinate
|
||||
// R2 ... pointer to source text buffer
|
||||
// R3 ... pointer to font line
|
||||
// R4 ... background color (expanded to 32-bit)
|
||||
// R5..R7 ... (temporary)
|
||||
// LR ... pointer to conversion table
|
||||
// [SP+4] ... base pointer to text data (without X)
|
||||
// [SP+8] ... remaining width
|
||||
// [SP+32] ... wrap width
|
||||
|
||||
// check bit 2 of X coordinate - check if image starts with 2nd half of first character
|
||||
lsls r6,r1,#29 // check bit 2 of X coordinate
|
||||
bpl 2f // bit 2 not set, starting even 4-pixels
|
||||
|
||||
// [4] load font sample -> R5
|
||||
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
|
||||
ldrb r5,[r3,r5] // [2] load font sample -> R5
|
||||
|
||||
// [3] load foreground color -> R6
|
||||
ldrb r6,[r2,#1] // [2] load foreground color from source text buffer -> R6
|
||||
adds r2,#2 // [1] shift pointer to source text buffer
|
||||
|
||||
// [4] expand foreground color to 32-bit -> R6
|
||||
lsls r7,r6,#8 // [1] shift foreground color << 8
|
||||
orrs r7,r6 // [1] color expanded to 16 bits
|
||||
lsls r6,r7,#16 // [1] shift 16-bit color << 16
|
||||
orrs r6,r7 // [1] color expanded to 32 bits
|
||||
|
||||
// [1] XOR foreground and background color -> R6
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// [2] prepare conversion table -> R5
|
||||
lsls r5,#3 // [1] multiply font sample * 8
|
||||
add r5,lr // [1] add pointer to conversion table
|
||||
|
||||
// [6] convert second 4 pixels (lower 4 bits)
|
||||
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
stmia r0!,{r7} // [2] store second 4 pixels
|
||||
|
||||
// shift X coordinate
|
||||
adds r1,#4 // shift X coordinate
|
||||
|
||||
// check end of segment
|
||||
ldr r7,[sp,#32] // load wrap width
|
||||
cmp r1,r7 // end of segment?
|
||||
blo 1f
|
||||
movs r1,#0 // reset X coordinate
|
||||
ldr r2,[sp,#4] // get base pointer to text data -> R2
|
||||
|
||||
// shift remaining width
|
||||
1: ldr r7,[sp,#8] // get remaining width
|
||||
subs r7,#4 // shift width
|
||||
str r7,[sp,#8] // save new width
|
||||
|
||||
// prepare wrap width - start X -> R7
|
||||
2: ldr r7,[sp,#32] // load wrap width
|
||||
subs r7,r1 // pixels remaining to end of segment
|
||||
|
||||
// ---- start outer loop, render one part of segment
|
||||
// Outer loop variables (* prepared before outer loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... number of characters to generate in one part of segment
|
||||
// R2 ... *pointer to source text buffer
|
||||
// R3 ... *pointer to font line
|
||||
// R4 ... *background color (expanded to 32-bit)
|
||||
// R5 ... (temporary)
|
||||
// R6 ... (temporary)
|
||||
// R7 ... *wrap width of this segment, later: temporary
|
||||
// LR ... *pointer to conversion table
|
||||
// [SP+4] ... *base pointer to text data (without X)
|
||||
// [SP+8] ... *remaining width
|
||||
// [SP+32] ... *wrap width
|
||||
|
||||
RenderFText_OutLoop:
|
||||
|
||||
// limit wrap width by total width -> R7
|
||||
ldr r6,[sp,#8] // get remaining width
|
||||
cmp r7,r6 // compare with wrap width
|
||||
bls 2f // width is OK
|
||||
mov r7,r6 // limit wrap width
|
||||
|
||||
// check if remain whole characters
|
||||
2: cmp r7,#8 // check number of remaining pixels
|
||||
bhs 5f // enough characters remain
|
||||
|
||||
// check if 1st part of last character remains
|
||||
cmp r7,#4 // check 1st part of last character
|
||||
blo 3f // all done
|
||||
|
||||
// ---- render 1st part of last character
|
||||
|
||||
RenderFText_Last:
|
||||
|
||||
// [4] load font sample -> R5
|
||||
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
|
||||
ldrb r5,[r3,r5] // [2] load font sample -> R5
|
||||
|
||||
// [3] load foreground color -> R6
|
||||
ldrb r6,[r2,#1] // [2] load foreground color from source text buffer -> R6
|
||||
adds r2,#2 // [1] shift pointer to source text buffer
|
||||
|
||||
// [4] expand foreground color to 32-bit
|
||||
lsls r1,r6,#8 // [1] shift foreground color << 8
|
||||
orrs r1,r6 // [1] color expanded to 16 bits
|
||||
lsls r6,r1,#16 // [1] shift 16-bit color << 16
|
||||
orrs r6,r1 // [1] color expanded to 32 bits
|
||||
|
||||
// [1] XOR foreground and background color -> R6
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// [2] prepare conversion table -> R5
|
||||
lsls r5,#3 // [1] multiply font sample * 8
|
||||
add r5,lr // [1] add pointer to conversion table
|
||||
|
||||
// [6] convert first 4 pixels (higher 4 bits)
|
||||
ldr r1,[r5,#0] // [2] load mask for higher 4 bits
|
||||
ands r1,r6 // [1] mask foreground color
|
||||
eors r1,r4 // [1] combine with background color
|
||||
stmia r0!,{r1} // [2] store first 4 pixels
|
||||
|
||||
// check if continue with next segment
|
||||
ldr r2,[sp,#4] // get base pointer to text data -> R2
|
||||
cmp r7,#4
|
||||
bhi RenderFText_OutLoop
|
||||
|
||||
// pop registers and return
|
||||
3: pop {r1-r7,pc}
|
||||
|
||||
// ---- prepare to render whole characters
|
||||
|
||||
// prepare number of whole characters to render -> R1
|
||||
5: lsrs r1,r7,#2 // shift to get number of characters*2
|
||||
lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5
|
||||
subs r6,r5 // get remaining width
|
||||
str r6,[sp,#8] // save new remaining width
|
||||
subs r1,#1 // number of characters*2 - 1
|
||||
|
||||
// ---- [29*N-1] start inner loop, render characters in one part of segment
|
||||
// Inner loop variables (* prepared before inner loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... *number of characters to generate*2 - 1 (loop counter)
|
||||
// R2 ... *pointer to source text buffer
|
||||
// R3 ... *pointer to font line
|
||||
// R4 ... *background color (expanded to 32-bit)
|
||||
// R5 ... font sample
|
||||
// R6 ... foreground color (expanded to 32-bit)
|
||||
// R7 ... (temporary)
|
||||
// LR ... *pointer to conversion table
|
||||
|
||||
RenderFText_InLoop:
|
||||
|
||||
// [4] load font sample -> R5
|
||||
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
|
||||
ldrb r5,[r3,r5] // [2] load font sample -> R5
|
||||
|
||||
// [3] load foreground color -> R6
|
||||
ldrb r6,[r2,#1] // [2] load foreground color from source text buffer -> R6
|
||||
adds r2,#2 // [1] shift pointer to source text buffer
|
||||
|
||||
// [4] expand foreground color to 32-bit
|
||||
lsls r7,r6,#8 // [1] shift foreground color << 8
|
||||
orrs r7,r6 // [1] color expanded to 16 bits
|
||||
lsls r6,r7,#16 // [1] shift 16-bit color << 16
|
||||
orrs r6,r7 // [1] color expanded to 32 bits
|
||||
|
||||
// [1] XOR foreground and background color -> R6
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// [2] prepare conversion table -> R5
|
||||
lsls r5,#3 // [1] multiply font sample * 8
|
||||
add r5,lr // [1] add pointer to conversion table
|
||||
|
||||
// [6] convert first 4 pixels (higher 4 bits)
|
||||
ldr r7,[r5,#0] // [2] load mask for higher 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
stmia r0!,{r7} // [2] store first 4 pixels
|
||||
|
||||
// [6] convert second 4 pixels (lower 4 bits)
|
||||
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
stmia r0!,{r7} // [2] store second 4 pixels
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r1,#2 // [1] shift loop counter
|
||||
bhi RenderFText_InLoop // [1,2] > 0, render next whole character
|
||||
|
||||
// ---- end inner loop, continue with last character, or start new part
|
||||
|
||||
// continue to outer loop
|
||||
ldr r7,[sp,#32] // load wrap width
|
||||
beq RenderFText_Last // render 1st half of last character
|
||||
ldr r2,[sp,#4] // get base pointer to text data -> R2
|
||||
b RenderFText_OutLoop // go back to outer loop
|
||||
|
||||
.align 2
|
||||
RenderFText_Addr:
|
||||
.word RenderTextMask
|
||||
RenderFText_pSioBase:
|
||||
.word SIO_BASE // addres of SIO base
|
||||
|
|
@ -1,258 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_GRAPH1
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// render font pixel mask
|
||||
.extern RenderTextMask // u32 RenderTextMask[512];
|
||||
|
||||
// extern "C" u8* RenderGraph1(u8* dbuf, int x, int y, int w, sSegm* segm);
|
||||
|
||||
// render 1-bit palette graphics GF_GRAPH1
|
||||
// dbuf ... destination data buffer
|
||||
// x ... start X coordinate (must be multiple of 4)
|
||||
// y ... start Y coordinate
|
||||
// w ... width of this segment (must be multiple of 4)
|
||||
// segm ... video segment
|
||||
// Output new dbuf pointer.
|
||||
// 320 pixels takes 6 us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderGraph1
|
||||
RenderGraph1:
|
||||
|
||||
// push registers
|
||||
push {r3-r7,lr}
|
||||
|
||||
// Input registers and stack content:
|
||||
// R0 ... destination data buffer
|
||||
// R1 ... start X coordinate
|
||||
// R2 ... start Y coordinate
|
||||
// SP+0: R3 width to display
|
||||
// SP+4: R4
|
||||
// SP+8: R5
|
||||
// SP+12: R6
|
||||
// SP+16: R7
|
||||
// SP+20: LR
|
||||
// SP+24: video segment (later: wrap width in X direction)
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#24] // load video segment -> R4
|
||||
|
||||
// get wrap width -> [SP+24]
|
||||
ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width
|
||||
movs r7,#3 // mask to align to 32-bit
|
||||
bics r5,r7 // align wrap
|
||||
str r5,[sp,#24] // save wrap width
|
||||
|
||||
// align X coordinate to 32-bit -> R1
|
||||
bics r1,r7
|
||||
|
||||
// align remaining width -> [SP+0]
|
||||
bics r3,r7
|
||||
str r3,[sp,#0] // save new width
|
||||
|
||||
// base pointer to image data (without X) -> LR
|
||||
ldrh r5,[r4,#SSEGM_WB] // get pitch of lines
|
||||
muls r2,r5 // Y * WB -> offset of row in text buffer
|
||||
ldr r5,[r4,#SSEGM_DATA] // pointer to data
|
||||
add r2,r5 // base address of text buffer
|
||||
mov lr,r2 // save pointer to text buffer
|
||||
|
||||
// prepare pointer to image data with X -> R2
|
||||
lsrs r2,r1,#3 // convert X to character index (1 character is 8 pixels width)
|
||||
add r2,lr // pointer to source text buffer -> R2
|
||||
|
||||
// prepare foreground color, expand to 32-bit -> R6
|
||||
ldrb r6,[r4,#SSEGM_PAR+1] // load foreground color
|
||||
lsls r7,r6,#8 // [1] shift foreground color << 8
|
||||
orrs r7,r6 // [1] color expanded to 16 bits
|
||||
lsls r6,r7,#16 // [1] shift 16-bit color << 16
|
||||
orrs r6,r7 // [1] color expanded to 32 bits
|
||||
|
||||
// prepare background color, expand to 32 bits -> R4
|
||||
ldrb r4,[r4,#SSEGM_PAR] // load background color
|
||||
lsls r5,r4,#8 // shift background color << 8
|
||||
orrs r5,r4 // color expanded to 16 bits
|
||||
lsls r4,r5,#16 // shift 16-bit color << 16
|
||||
orrs r4,r5 // color expanded to 32 bits
|
||||
|
||||
// [1] XOR foreground and background color -> R6
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// prepare pointer to conversion table -> R3
|
||||
ldr r3,RenderGraph1_Addr // get pointer to conversion table -> R3
|
||||
|
||||
// ---- render 2nd half of first character
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... start X coordinate
|
||||
// R2 ... current pointer to image buffer
|
||||
// R3 ... pointer to conversion table
|
||||
// R4 ... background color (expanded to 32-bit)
|
||||
// R5 ... (temporary)
|
||||
// R6 ... foreground color (expanded to 32-bit)
|
||||
// R7 ... (temporary)
|
||||
// LR ... base pointer to image data (without X)
|
||||
// [SP+0] ... remaining width
|
||||
// [SP+24] ... wrap width
|
||||
|
||||
// check bit 2 of X coordinate - check if image starts with 2nd half of first character
|
||||
lsls r5,r1,#29 // check bit 2 of X coordinate
|
||||
bpl 2f // bit 2 not set, starting even 4-pixels
|
||||
|
||||
// [3] load image sample -> R5
|
||||
ldrb r5,[r2,#0] // [2] load image sample -> R5
|
||||
adds r2,#1 // [1] shift pointer to image buffer
|
||||
|
||||
// [2] prepare conversion table -> R5
|
||||
lsls r5,#3 // [1] multiply image sample * 8
|
||||
add r5,r3 // [1] add pointer to conversion table
|
||||
|
||||
// [6] convert second 4 pixels (lower 4 bits)
|
||||
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
stmia r0!,{r7} // [2] store second 4 pixels
|
||||
|
||||
// shift X coordinate
|
||||
adds r1,#4 // shift X coordinate
|
||||
|
||||
// check end of segment
|
||||
ldr r7,[sp,#24] // load wrap width
|
||||
cmp r1,r7 // X=end of segment?
|
||||
blo 1f
|
||||
movs r1,#0 // reset X coordinate
|
||||
mov r2,lr // get base pointer to image data -> R2
|
||||
|
||||
// shift remaining width
|
||||
1: ldr r7,[sp,#0] // get remaining width
|
||||
subs r7,#4 // shift width
|
||||
str r7,[sp,#0] // save new width
|
||||
|
||||
// prepare wrap width - start X -> R7
|
||||
2: ldr r7,[sp,#24] // load wrap width
|
||||
subs r7,r1 // pixels remaining to end of segment
|
||||
|
||||
// ---- start outer loop, render one part of segment
|
||||
// Outer loop variables (* prepared before outer loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... number of characters to generate in one part of segment
|
||||
// R2 ... *current pointer to image buffer
|
||||
// R3 ... *pointer to conversion table
|
||||
// R4 ... *background color (expanded to 32-bit)
|
||||
// R5 ... (temporary)
|
||||
// R6 ... *foreground color (expanded to 32-bit)
|
||||
// R7 ... *wrap width of this segment, later: temporary
|
||||
// LR ... *base pointer to image data (without X)
|
||||
// [SP+0] ... *remaining width
|
||||
// [SP+24] ... *wrap width
|
||||
|
||||
RenderGraph1_OutLoop:
|
||||
|
||||
// limit wrap width by total width -> R7
|
||||
ldr r5,[sp,#0] // get remaining width
|
||||
cmp r7,r5 // compare with wrap width
|
||||
bls 2f // width is OK
|
||||
mov r7,r5 // limit wrap width
|
||||
|
||||
// check if remain whole characters
|
||||
2: cmp r7,#8 // check number of remaining pixels
|
||||
bhs 5f // enough characters remain
|
||||
|
||||
// check if 1st part of last character remains
|
||||
cmp r7,#4 // check 1st part of last character
|
||||
blo 3f // all done
|
||||
|
||||
// ---- render 1st part of last character
|
||||
|
||||
RenderGraph1_Last:
|
||||
|
||||
// [3] load image sample -> R5
|
||||
ldrb r5,[r2,#0] // [2] load image sample -> R5
|
||||
adds r2,#1 // [1] shift pointer to image buffer
|
||||
|
||||
// [2] prepare conversion table -> R5
|
||||
lsls r5,#3 // [1] multiply image sample * 8
|
||||
add r5,r3 // [1] add pointer to conversion table
|
||||
|
||||
// [6] convert first 4 pixels (higher 4 bits)
|
||||
ldr r1,[r5,#0] // [2] load mask for higher 4 bits
|
||||
ands r1,r6 // [1] mask foreground color
|
||||
eors r1,r4 // [1] combine with background color
|
||||
stmia r0!,{r1} // [2] store first 4 pixels
|
||||
|
||||
// check if continue with next segment
|
||||
mov r2,lr // get base pointer to image data -> R2
|
||||
cmp r7,#4
|
||||
bhi RenderGraph1_OutLoop
|
||||
|
||||
// pop registers and return
|
||||
3: pop {r3-r7,pc}
|
||||
|
||||
// ---- prepare to render whole characters
|
||||
|
||||
// prepare number of whole characters to render -> R1
|
||||
5: lsrs r1,r7,#2 // shift to get number of characters*2
|
||||
lsls r7,r1,#2 // shift back to get number of pixels, rounded down -> R7
|
||||
subs r5,r7 // get remaining width
|
||||
str r5,[sp,#0] // save new remaining width
|
||||
subs r1,#1 // number of characters*2 - 1
|
||||
|
||||
// ---- [20*N-1] start inner loop, render characters in one part of segment
|
||||
// Inner loop variables (* prepared before inner loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... *number of characters to generate*2 - 1 (loop counter)
|
||||
// R2 ... *current pointer to image buffer
|
||||
// R3 ... *pointer to conversion table
|
||||
// R4 ... *background color (expanded to 32-bit)
|
||||
// R5 ... font sample
|
||||
// R6 ... *foreground color (expanded to 32-bit)
|
||||
// R7 ... (temporary)
|
||||
// LR ... *base pointer to image data (without X)
|
||||
|
||||
RenderGraph1_InLoop:
|
||||
|
||||
// [3] load image sample -> R5
|
||||
ldrb r5,[r2,#0] // [2] load image sample -> R5
|
||||
adds r2,#1 // [1] shift pointer to image buffer
|
||||
|
||||
// [2] prepare conversion table -> R5
|
||||
lsls r5,#3 // [1] multiply image sample * 8
|
||||
add r5,r3 // [1] add pointer to conversion table
|
||||
|
||||
// [6] convert first 4 pixels (higher 4 bits)
|
||||
ldr r7,[r5,#0] // [2] load mask for higher 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
stmia r0!,{r7} // [2] store first 4 pixels
|
||||
|
||||
// [6] convert second 4 pixels (lower 4 bits)
|
||||
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
stmia r0!,{r7} // [2] store second 4 pixels
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r1,#2 // [1] shift loop counter
|
||||
bhi RenderGraph1_InLoop // [1,2] > 0, render next whole character
|
||||
|
||||
// ---- end inner loop, continue with last character, or start new part
|
||||
|
||||
// continue to outer loop
|
||||
ldr r7,[sp,#24] // load wrap width
|
||||
beq RenderGraph1_Last // render 1st half of last character
|
||||
mov r2,lr // get base pointer to image data -> R2
|
||||
b RenderGraph1_OutLoop // go back to outer loop
|
||||
|
||||
.align 2
|
||||
RenderGraph1_Addr:
|
||||
.word RenderTextMask
|
||||
|
|
@ -1,173 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_GRAPH2
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// extern "C" u8* RenderGraph2(u8* dbuf, int x, int y, int w, sSegm* segm);
|
||||
|
||||
// render 2-bit palette graphics GF_GRAPH2
|
||||
// R0 ... destination data buffer
|
||||
// R1 ... start X coordinate (must be multiple of 4)
|
||||
// R2 ... start Y coordinate
|
||||
// R3 ... width of this segment (must be multiple of 4)
|
||||
// segm ... video segment
|
||||
// Output new dbuf pointer.
|
||||
// 320 pixels takes 5 us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderGraph2
|
||||
RenderGraph2:
|
||||
|
||||
// push registers
|
||||
push {r3-r7,lr}
|
||||
|
||||
// Input registers and stack content:
|
||||
// R0 ... destination data buffer
|
||||
// R1 ... start X coordinate
|
||||
// R2 ... start Y coordinate
|
||||
// SP+0: R3 ... width to display (remaining width)
|
||||
// SP+4: R4
|
||||
// SP+8: R5
|
||||
// SP+12: R6
|
||||
// SP+16: R7
|
||||
// SP+20: LR
|
||||
// SP+24: video segment
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#24] // load video segment -> R4
|
||||
|
||||
// get wrap width -> R7
|
||||
ldrh r7,[r4,#SSEGM_WRAPX] // get wrap width
|
||||
movs r6,#3 // mask to align to 32-bit
|
||||
bics r7,r6 // align wrap
|
||||
|
||||
// align X coordinate to 32-bit -> R1
|
||||
bics r1,r6
|
||||
|
||||
// align remaining width -> [SP+0]
|
||||
bics r3,r6
|
||||
str r3,[sp,#0] // save new width
|
||||
|
||||
// base pointer to image data (without X) -> LR, R2
|
||||
ldrh r5,[r4,#SSEGM_WB] // get pitch of rows
|
||||
muls r2,r5 // Y * WB -> offset of row in image buffer
|
||||
ldr r5,[r4,#SSEGM_DATA] // pointer to data
|
||||
add r2,r5 // base address of image buffer
|
||||
mov lr,r2 // save pointer to image buffer
|
||||
|
||||
// prepare pointer to image data with X -> R2
|
||||
lsrs r6,r1,#2 // convert X to character index (1 character is 4 pixels width)
|
||||
add r2,r6 // add index, pointer to source image buffer -> R2
|
||||
|
||||
// prepare pointer to palette translation table -> R3
|
||||
ldr r3,[r4,#SSEGM_PAR] // get pointer to palette translation table -> R3
|
||||
|
||||
// prepare wrap width - start X -> R6
|
||||
subs r6,r7,r1 // pixels remaining to end of segment
|
||||
|
||||
// ---- start outer loop, render one part of segment
|
||||
// Outer loop variables (* prepared before outer loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... number of 4-pixels to generate in one part of segment
|
||||
// R2 ... *pointer to source image buffer
|
||||
// R3 ... *pointer to palette translation table
|
||||
// R4 ... (temporary)
|
||||
// R5 ... (temporary)
|
||||
// R6 ... part width
|
||||
// R7 ... *wrap width
|
||||
// LR ... *base pointer to image data (without X)
|
||||
// [SP+0] ... width to display
|
||||
|
||||
RenderGraph2_OutLoop:
|
||||
|
||||
// limit wrap width by total width -> R7
|
||||
ldr r4,[sp,#0] // get remaining width
|
||||
cmp r6,r4 // compare with wrap width
|
||||
bls 2f // width is OK
|
||||
mov r6,r4 // limit wrap width
|
||||
|
||||
// check number of pixels
|
||||
2: cmp r6,#4 // check number of remaining pixels
|
||||
bhs 5f // enough characters remain
|
||||
|
||||
// pop registers and return
|
||||
pop {r3-r7,pc}
|
||||
|
||||
// ---- prepare to render whole characters
|
||||
|
||||
// prepare number of 4-pixels to render -> R1
|
||||
5: lsrs r1,r6,#2 // shift to get number of 4-pixels
|
||||
lsls r6,r1,#2 // shift back to get number of pixels, rounded down -> R6
|
||||
subs r4,r6 // get remaining width
|
||||
str r4,[sp,#0] // save new remaining width
|
||||
|
||||
// ---- generate odd pixel
|
||||
|
||||
// [2,3] check odd pixel
|
||||
lsrs r1,#1 // [1] check odd pixel
|
||||
bcc RenderGraph2_InLoop // [1,2] odd pixel not set
|
||||
|
||||
// [3] load image sample -> R4
|
||||
ldrb r4,[r2,#0] // [2] load image sample
|
||||
adds r2,#1 // [1] increase pointer to image data
|
||||
|
||||
// [5] write 4 pixels
|
||||
lsls r4,#2 // [1] index*4
|
||||
ldr r5,[r3,r4] // [2] load colors
|
||||
stmia r0!,{r5} // [2] write pixels
|
||||
|
||||
// [2,3] check end of data
|
||||
tst r1,r1 // [1] check counter
|
||||
beq RenderGraph2_EndLoop // [1,2] end
|
||||
|
||||
// ---- [17*N-1] start inner loop, render pixels in one part of segment
|
||||
// Inner loop variables (* prepared before inner loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... *number of 4-pixels to generate (loop counter)
|
||||
// R2 ... *pointer to source image buffer
|
||||
// R3 ... *pointer to palette translation table
|
||||
// R4 ... image sample
|
||||
// R5 ... output pixels
|
||||
// R6 ... output pixels
|
||||
// R7 ... *wrap width
|
||||
// LR ... *base pointer to image data (without X)
|
||||
|
||||
RenderGraph2_InLoop:
|
||||
|
||||
// [2] load image sample -> R4
|
||||
ldrb r4,[r2,#0] // [2] load image sample
|
||||
|
||||
// [3] prepare 4 pixels
|
||||
lsls r4,#2 // [1] index*4
|
||||
ldr r5,[r3,r4] // [2] load colors
|
||||
|
||||
// [3] load image sample -> R4
|
||||
ldrb r4,[r2,#1] // [2] load image sample
|
||||
adds r2,#2 // [1] increase pointer to image data
|
||||
|
||||
// [6] prepare and write next 4 pixels
|
||||
lsls r4,#2 // [1] index*4
|
||||
ldr r6,[r3,r4] // [2] load colors
|
||||
stmia r0!,{r5,r6} // [3] write pixels
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r1,#1 // [1] loop counter
|
||||
bne RenderGraph2_InLoop // [1,2] next step
|
||||
|
||||
// ---- end inner loop, start new part
|
||||
|
||||
RenderGraph2_EndLoop:
|
||||
|
||||
// continue to outer loop
|
||||
mov r6,r7 // load wrap width -> R6
|
||||
mov r2,lr // get base pointer to text data -> R2
|
||||
b RenderGraph2_OutLoop // go back to outer loop
|
||||
|
|
@ -1,214 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_GRAPH4
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// extern "C" u8* RenderGraph4(u8* dbuf, int x, int y, int w, sSegm* segm);
|
||||
|
||||
// render 4-bit palette graphics GF_GRAPH4
|
||||
// R0 ... destination data buffer
|
||||
// R1 ... start X coordinate (must be multiple of 4)
|
||||
// R2 ... start Y coordinate
|
||||
// R3 ... width of this segment (must be multiple of 4)
|
||||
// segm ... video segment
|
||||
// Output new dbuf pointer.
|
||||
// 320 pixels takes 8.8 us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderGraph4
|
||||
RenderGraph4:
|
||||
|
||||
// push registers
|
||||
push {r3-r7,lr}
|
||||
|
||||
// Input registers and stack content:
|
||||
// R0 ... destination data buffer
|
||||
// R1 ... start X coordinate
|
||||
// R2 ... start Y coordinate
|
||||
// SP+0: R3 ... width to display (remaining width)
|
||||
// SP+4: R4
|
||||
// SP+8: R5
|
||||
// SP+12: R6
|
||||
// SP+16: R7
|
||||
// SP+20: LR
|
||||
// SP+24: video segment (later: wrap width in X direction)
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#24] // load video segment -> R4
|
||||
|
||||
// get wrap width -> [SP+24]
|
||||
ldrh r7,[r4,#SSEGM_WRAPX] // get wrap width
|
||||
movs r6,#3 // mask to align to 32-bit
|
||||
bics r7,r6 // align wrap
|
||||
str r7,[sp,#24] // save wrap width
|
||||
|
||||
// align X coordinate to 32-bit -> R1
|
||||
bics r1,r6
|
||||
|
||||
// align remaining width -> [SP+0]
|
||||
bics r3,r6
|
||||
str r3,[sp,#0] // save new width
|
||||
|
||||
// base pointer to image data (without X) -> LR, R2
|
||||
ldrh r5,[r4,#SSEGM_WB] // get pitch of rows
|
||||
muls r2,r5 // Y * WB -> offset of row in image buffer
|
||||
ldr r5,[r4,#SSEGM_DATA] // pointer to data
|
||||
add r2,r5 // base address of image buffer
|
||||
mov lr,r2 // save pointer to image buffer
|
||||
|
||||
// prepare pointer to image data with X -> R2
|
||||
lsrs r6,r1,#1 // convert X to character index (1 character is 2 pixels width)
|
||||
add r2,r6 // add index, pointer to source image buffer -> R2
|
||||
|
||||
// prepare pointer to palette translation table -> R3
|
||||
ldr r3,[r4,#SSEGM_PAR] // get pointer to palette translation table -> R3
|
||||
|
||||
// prepare wrap width - start X -> R6
|
||||
ldr r6,[sp,#24] // load wrap width
|
||||
subs r6,r1 // pixels remaining to end of segment
|
||||
|
||||
// ---- start outer loop, render one part of segment
|
||||
// Outer loop variables (* prepared before outer loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... number of 4-pixels to generate in one part of segment
|
||||
// R2 ... *pointer to source image buffer
|
||||
// R3 ... *pointer to palette translation table
|
||||
// R4 ... (temporary)
|
||||
// R5 ... (temporary)
|
||||
// R6 ... part width
|
||||
// R7 ... (temporary)
|
||||
// LR ... *base pointer to image data (without X)
|
||||
// [SP+0] ... width to display
|
||||
// [SP+24] ... wrap width
|
||||
|
||||
RenderGraph4_OutLoop:
|
||||
|
||||
// limit wrap width by total width -> R6
|
||||
ldr r4,[sp,#0] // get remaining width
|
||||
cmp r6,r4 // compare with wrap width
|
||||
bls 2f // width is OK
|
||||
mov r6,r4 // limit wrap width
|
||||
|
||||
// check number of pixels
|
||||
2: cmp r6,#4 // check number of remaining pixels
|
||||
bhs 5f // enough characters remain
|
||||
|
||||
// pop registers and return
|
||||
pop {r3-r7,pc}
|
||||
|
||||
// ---- prepare to render whole characters
|
||||
|
||||
// prepare number of 4-pixels to render -> R1
|
||||
5: lsrs r1,r6,#2 // shift to get number of 4-pixels
|
||||
lsls r6,r1,#2 // shift back to get number of pixels, rounded down -> R6
|
||||
subs r4,r6 // get remaining width
|
||||
str r4,[sp,#0] // save new remaining width
|
||||
|
||||
// ---- generate odd pixel
|
||||
|
||||
// [2,3] check odd pixel
|
||||
lsrs r1,#1 // [1] check odd pixel
|
||||
bcc RenderGraph4_InLoop // [1,2] odd pixel not set
|
||||
|
||||
// [2] load image sample -> R4
|
||||
ldrb r4,[r2,#0] // [2] load image sample
|
||||
|
||||
// [3] prepare 1st and 2nd pixel -> R5
|
||||
lsls r4,#1 // [1] index*2
|
||||
ldrh r5,[r3,r4] // [2] load 2 pixels
|
||||
|
||||
// [3] load image sample -> R4
|
||||
ldrb r4,[r2,#1] // [2] load image sample
|
||||
adds r2,#2 // [1] increase pointer to image data
|
||||
|
||||
// [3] prepare 3rd and 4th pixel -> R6
|
||||
lsls r4,#1 // [1] index*2
|
||||
ldrh r6,[r3,r4] // [2] load 2 pixels
|
||||
|
||||
// [2] compose pixels -> R5
|
||||
lsls r6,#16 // [1] shift 3rd and 4th pixels
|
||||
orrs r5,r6 // [1] compose pixels
|
||||
|
||||
// [2] write pixels
|
||||
stmia r0!,{r5} // [2] write 4 pixels
|
||||
|
||||
// [2,3] check end of data
|
||||
tst r1,r1 // [1] check counter
|
||||
beq RenderGraph4_EndLoop // [1,2] end
|
||||
|
||||
// ---- [31*N-1] start inner loop, render pixels in one part of segment
|
||||
// Inner loop variables (* prepared before inner loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... *number of 4-pixels to generate (loop counter)
|
||||
// R2 ... *pointer to source image buffer
|
||||
// R3 ... *pointer to palette translation table
|
||||
// R4 ... image sample
|
||||
// R5 ... output pixels
|
||||
// R6 ... output pixels
|
||||
// R7 ... output pixels
|
||||
// LR ... *base pointer to image data (without X)
|
||||
// [SP+24] ... wrap width
|
||||
|
||||
RenderGraph4_InLoop:
|
||||
|
||||
// [2] load image sample -> R4
|
||||
ldrb r4,[r2,#0] // [2] load image sample
|
||||
|
||||
// [3] prepare 1st and 2nd pixel -> R5
|
||||
lsls r4,#1 // [1] index*2
|
||||
ldrh r5,[r3,r4] // [2] load 2 pixels
|
||||
|
||||
// [2] load image sample -> R4
|
||||
ldrb r4,[r2,#1] // [2] load image sample
|
||||
|
||||
// [3] prepare 3rd and 4th pixel -> R6
|
||||
lsls r4,#1 // [1] index*2
|
||||
ldrh r6,[r3,r4] // [2] load 2 pixels
|
||||
|
||||
// [2] compose pixels -> R5
|
||||
lsls r6,#16 // [1] shift 3rd and 4th pixels
|
||||
orrs r5,r6 // [1] compose pixels
|
||||
|
||||
// [2] load image sample -> R4
|
||||
ldrb r4,[r2,#2] // [2] load image sample
|
||||
|
||||
// [3] prepare 1st and 2nd pixel -> R6
|
||||
lsls r4,#1 // [1] index*2
|
||||
ldrh r6,[r3,r4] // [2] load 2 pixels
|
||||
|
||||
// [3] load image sample -> R4
|
||||
ldrb r4,[r2,#3] // [2] load image sample
|
||||
adds r2,#4 // [1] increase pointer to image data
|
||||
|
||||
// [3] prepare 3rd and 4th pixel -> R7
|
||||
lsls r4,#1 // [1] index*2
|
||||
ldrh r7,[r3,r4] // [2] load 2 pixels
|
||||
|
||||
// [2] compose pixels -> R6
|
||||
lsls r7,#16 // [1] shift 3rd and 4th pixels
|
||||
orrs r6,r7 // [1] compose pixels
|
||||
|
||||
// [3] write pixels
|
||||
stmia r0!,{r5,r6} // [3] write 8 pixels
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r1,#1 // [1] loop counter
|
||||
bne RenderGraph4_InLoop // [1,2] next step
|
||||
|
||||
// ---- end inner loop, start new part
|
||||
|
||||
RenderGraph4_EndLoop:
|
||||
|
||||
// continue to outer loop
|
||||
ldr r6,[sp,#24] // load wrap width -> R6
|
||||
mov r2,lr // get base pointer to text data -> R2
|
||||
b RenderGraph4_OutLoop // go back to outer loop
|
||||
|
|
@ -1,134 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_GRAPH8
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// extern "C" u32* RenderGrad1(u32* cbuf, int x, int y, int w, sSegm* segm);
|
||||
|
||||
// render gradient with 1 line GF_GRAD1
|
||||
// R0 ... pointer to control buffer
|
||||
// R1 ... start X coordinate (in pixels, must be multiple of 4)
|
||||
// R2 ... start Y coordinate (in graphics lines), will be ignored and substituted with 0
|
||||
// R3 ... width to display (must be multiple of 4 and > 0)
|
||||
// [stack] ... segm video segment sSegm
|
||||
// Output new pointer to control buffer.
|
||||
// 320 pixels takes 0.45 us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderGrad1
|
||||
RenderGrad1:
|
||||
movs r2,#0
|
||||
|
||||
|
||||
// extern "C" u32* RenderGrad2(u32* cbuf, int x, int y, int w, sSegm* segm);
|
||||
|
||||
// render gradient with 2 lines GF_GRAD2
|
||||
// R0 ... pointer to control buffer
|
||||
// R1 ... start X coordinate (in pixels, must be multiple of 4)
|
||||
// R2 ... start Y coordinate (in graphics lines), will be masked to values 0 and 1
|
||||
// R3 ... width to display (must be multiple of 4 and > 0)
|
||||
// [stack] ... segm video segment sSegm
|
||||
// Output new pointer to control buffer.
|
||||
// 320 pixels takes 0.45 us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderGrad2
|
||||
RenderGrad2:
|
||||
lsls r2,#31
|
||||
lsrs r2,#31
|
||||
|
||||
|
||||
// extern "C" u32* RenderGraph8(u32* cbuf, int x, int y, int w, sSegm* segm);
|
||||
|
||||
// render native 8-bit graphics GF_GRAPH8
|
||||
// R0 ... pointer to control buffer
|
||||
// R1 ... start X coordinate (in pixels, must be multiple of 4)
|
||||
// R2 ... start Y coordinate (in graphics lines)
|
||||
// R3 ... width to display (must be multiple of 4 and > 0)
|
||||
// [stack] ... segm video segment sSegm
|
||||
// Output new pointer to control buffer.
|
||||
// 320 pixels takes 0.45 us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderGraph8
|
||||
RenderGraph8:
|
||||
|
||||
// push registers
|
||||
push {r4-r7,lr}
|
||||
|
||||
// Stack content:
|
||||
// SP+0: R4
|
||||
// SP+4: R5
|
||||
// SP+8: R6
|
||||
// SP+12: R7
|
||||
// SP+16: LR
|
||||
// SP+20: video segment
|
||||
|
||||
// Variables:
|
||||
// R0 ... pointer to control buffer
|
||||
// R1 ... X coordinate, later: width of one segment
|
||||
// R2 ... Y coordinate, later: current pointer to data buffer
|
||||
// R3 ... remaining width
|
||||
// R4 ... base pointer to data buffer
|
||||
// R5 ... (temporary)
|
||||
// R6 ... (temporary)
|
||||
// R7 ... wrap width
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#20] // load video segment -> R4
|
||||
|
||||
// get wrap width -> R7
|
||||
ldrh r7,[r4,#SSEGM_WRAPX] // get wrap width
|
||||
movs r6,#3 // mask to align to 32-bit
|
||||
bics r7,r6 // align wrap
|
||||
|
||||
// align X coordinate to 32-bit -> R1
|
||||
bics r1,r6
|
||||
|
||||
// align remaining width -> R3
|
||||
bics r3,r6
|
||||
|
||||
// base pointer to data buffer (without X) -> R4
|
||||
ldrh r5,[r4,#SSEGM_WB] // get pitch of rows
|
||||
muls r2,r5 // Y * WB -> offset in data buffer
|
||||
ldr r5,[r4,#SSEGM_DATA] // pointer to data
|
||||
adds r4,r2,r5 // base address of data buffer -> R4
|
||||
|
||||
// prepare current pointer to image data with X -> R2
|
||||
adds r2,r4,r1 // pointer to source data buffer -> R2
|
||||
|
||||
// prepare wrap width - start X -> R1
|
||||
subs r1,r7,r1 // pixels remaining to end of segment
|
||||
|
||||
// check remaining width
|
||||
2: tst r3,r3 // check remaining width
|
||||
beq 6f // end of data
|
||||
|
||||
// limit wrap width by total width -> R1
|
||||
cmp r1,r3 // compare with wrap width
|
||||
bls 4f // width is OK
|
||||
mov r1,r3 // limit wrap width
|
||||
|
||||
// decrease remaining width
|
||||
4: subs r3,r1 // subtract from remaining width
|
||||
|
||||
// save control block
|
||||
lsrs r1,#2 // width / 4
|
||||
stm r0!,{r1,r2} // save width and pointer to control block
|
||||
|
||||
// continue to next loop
|
||||
mov r1,r7 // load wrap width -> R1
|
||||
mov r2,r4 // get base pointer to text data -> R2
|
||||
b 2b // go next loop
|
||||
|
||||
// pop registers and return
|
||||
6: pop {r4-r7,pc}
|
||||
|
|
@ -1,310 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_GRAPH8MAT
|
||||
//
|
||||
// ****************************************************************************
|
||||
// data ... image data
|
||||
// par ... pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL))
|
||||
// par2 ... LOW=number of bits of image width, HIGH=number of bits of image height
|
||||
// image width must be max. 4096 (= 1<<FRACT); image with and height must be power of 2
|
||||
// wrapy ... segment height
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
#include "hardware/regs/sio.h" // registers of hardware divider
|
||||
#include "hardware/regs/addressmap.h" // SIO base address
|
||||
|
||||
#define ACCUM0_OFFSET 0
|
||||
#define ACCUM1_OFFSET 4
|
||||
#define BASE0_OFFSET 8
|
||||
#define BASE1_OFFSET 12
|
||||
#define BASE2_OFFSET 16
|
||||
#define POP_LANE0_OFFSET 20
|
||||
#define POP_LANE1_OFFSET 24
|
||||
#define POP_FULL_OFFSET 28
|
||||
#define PEEK_LANE0_OFFSET 32
|
||||
#define PEEK_LANE1_OFFSET 36
|
||||
#define PEEK_FULL_OFFSET 40
|
||||
#define CTRL_LANE0_OFFSET 44
|
||||
#define CTRL_LANE1_OFFSET 48
|
||||
#define ACCUM0_ADD_OFFSET 52
|
||||
#define ACCUM1_ADD_OFFSET 56
|
||||
#define BASE_1AND0_OFFSET 60
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// extern "C" u32* RenderGraph8Mat(u32* cbuf, int x, int y, int w, sSegm* segm);
|
||||
|
||||
// render 8-bit graphics GF_GRAPH8MAT, with 2D matrix transformation,
|
||||
// using hardware interpolator inter1 (inter1 state is not saved during interrup)
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... start X coordinate (not used)
|
||||
// R2 ... start Y coordinate (in graphics lines)
|
||||
// R3 ... width to display (must be multiple of 4)
|
||||
// [stack] ... segm video segment sSegm
|
||||
// Output new pointer to data buffer.
|
||||
|
||||
.thumb_func
|
||||
.global RenderGraph8Mat
|
||||
RenderGraph8Mat:
|
||||
|
||||
// Input registers and stack:
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... X coordinate (not used)
|
||||
// R2 ... Y coordinate
|
||||
// R3 ... remaining width
|
||||
// SP+0: R4
|
||||
// SP+4: R5
|
||||
// SP+8: R6
|
||||
// SP+12: R7
|
||||
// SP+16: LR
|
||||
// SP+20: video segment
|
||||
|
||||
// push registers
|
||||
push {r4-r7,lr}
|
||||
|
||||
// ---- prepare registers
|
||||
|
||||
// prepare start coordinate X0 = -w/2 -> LR
|
||||
lsrs r1,r3,#1 // width/2
|
||||
negs r1,r1 // negate
|
||||
mov lr,r1 // store start coordinate X0 -> LR
|
||||
|
||||
// prepare number of 4-pixels (loop counter) -> R7
|
||||
lsrs r7,r3,#2 // width/4 -> R7
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#20] // load video segment -> R4
|
||||
|
||||
// prepare current coordinate Y0 = -h/2 + y -> R12
|
||||
ldrh r1,[r4,#SSEGM_WRAPY] // get segment height -> R1
|
||||
lsrs r1,#1 // height/2
|
||||
negs r1,r1 // negate
|
||||
adds r1,r2 // add current Y coordinate
|
||||
mov r12,r1 // store current coordinate Y0 -> R12
|
||||
|
||||
// get number of bits of image width "xbits" -> R1
|
||||
ldrh r1,[r4,#SSEGM_PAR2] // number of bits of image width -> R1
|
||||
|
||||
// get number of bits of image height "ybits" -> R2
|
||||
ldrh r2,[r4,#SSEGM_PAR2+2] // number of bits of image height -> R2
|
||||
|
||||
// prepare address of interpolator base -> R3
|
||||
ldr r3,RenderGraph8Mat_Interp // get address of interpolator base -> R3
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R1 ... number of bits of image width xbits
|
||||
// R2 ... number of bits of image height ybits
|
||||
// R3 ... interpolator base
|
||||
// R4 ... video segment
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// ---- setup interpolator
|
||||
|
||||
// set image base to base2
|
||||
ldr r6,[r4,#SSEGM_DATA] // load image base
|
||||
str r6,[r3,#BASE2_OFFSET] // set image base
|
||||
|
||||
// set control word of lane 1 - add raw lane base back to accumulator, shift "FRACT-xbits", mask xbits...xbits+ybits-1
|
||||
ldr r6,RenderGraph8Mat_Ctrl // load control word
|
||||
subs r6,r1 // FRACT - xbits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required)
|
||||
lsls r5,r1,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift xbits to mask LSB position -> R5
|
||||
orrs r6,r5 // add xbits to control word
|
||||
subs r1,#1 // xbits - 1 -> R1
|
||||
adds r5,r1,r2 // xbits-1+ybits -> R5
|
||||
lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to MSB mask position
|
||||
orrs r6,r5 // add to control word
|
||||
str r6,[r3,#CTRL_LANE1_OFFSET] // set control word of lane 1
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R1 ... image width xbits-1
|
||||
// R3 ... interpolator base
|
||||
// R4 ... video segment
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// set control word of lane 0 - add raw lane base back to accumulator, shift "FRACT", mask 0..xbits-1
|
||||
ldr r6,RenderGraph8Mat_Ctrl // load control word
|
||||
lsls r1,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift xbits-1 to mask MSB position
|
||||
orrs r6,r1 // add to control word
|
||||
str r6,[r3,#CTRL_LANE0_OFFSET] // set control word of lane 0
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R3 ... interpolator base
|
||||
// R4 ... video segment
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// ---- set matrix
|
||||
|
||||
// get pointer to matrix -> R4
|
||||
ldr r4,[r4,#SSEGM_PAR] // get pointer to matrix -> R4
|
||||
|
||||
// r4+0 ... m11
|
||||
// r4+4 ... m12
|
||||
// r4+8 ... m13
|
||||
// r4+12 ... m21
|
||||
// r4+16 ... m22
|
||||
// r4+20 ... m23
|
||||
|
||||
// set m11 -> R5 base0
|
||||
ldr r5,[r4,#0] // load m11
|
||||
str r5,[r3,#BASE0_OFFSET] // set base0
|
||||
|
||||
// set m21 -> R6 base1
|
||||
ldr r6,[r4,#12] // load m21
|
||||
str r6,[r3,#BASE1_OFFSET] // set base1
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R3 ... interpolator base
|
||||
// R4 ... pointer to matrix
|
||||
// R5 ... m11
|
||||
// R6 ... m21
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// set x0*m11 + y0*m12 + m13 -> accum0
|
||||
mov r2,lr // start coordinate X0 -> X2
|
||||
muls r5,r2 // x0*m11 -> R5
|
||||
muls r2,r6 // x0*m21 -> R2
|
||||
ldr r1,[r4,#4] // load m12 -> R1
|
||||
mov r6,r12 // load coordinate Y0 -> R6
|
||||
muls r1,r6 // y0*m12 -> R1
|
||||
adds r5,r1 // x0*m11 + y0*m12 -> R5
|
||||
ldr r1,[r4,#8] // load m13 -> R1
|
||||
adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5
|
||||
str r5,[r3,#ACCUM0_OFFSET] // set accum0
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R2 ... x0*m21
|
||||
// R3 ... interpolator base
|
||||
// R4 ... pointer to matrix
|
||||
// R6 ... current coordinate Y0
|
||||
// R7 ... width/4
|
||||
|
||||
// set x0*m21 + y0*m22 + m23 -> accum1
|
||||
ldr r1,[r4,#16] // load m22 -> R1
|
||||
muls r1,r6 // y0*m22 -> R1
|
||||
adds r2,r1 // x0*m21 + y0*m22 -> R2
|
||||
ldr r1,[r4,#20] // load m23 -> R1
|
||||
adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2
|
||||
str r2,[r3,#ACCUM1_OFFSET] // set accum1
|
||||
|
||||
// ---- process odd 4-pixel
|
||||
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R3 ... interpolator base
|
||||
// R4 ... (temporary - get pointer to pixel)
|
||||
// R5 ... (temporary - load pixel)
|
||||
// R6 ... (temporary - pixel accumulator)
|
||||
// R7 ... width/4 (loop counter)
|
||||
|
||||
lsrs r7,#1 // width/4/2
|
||||
bcc 2f // no odd 4-pixel
|
||||
|
||||
// [3] load 1st pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r6,[r4,#0] // [2] load pixel
|
||||
|
||||
// [5] load 2nd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r5,[r4,#0] // [2] load pixel
|
||||
lsls r5,#8 // [1] shift 1 byte left
|
||||
orrs r6,r5 // [1] add pixel to accumulator
|
||||
|
||||
// [5] load 3rd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r5,[r4,#0] // [2] load pixel
|
||||
lsls r5,#16 // [1] shift 2 bytes left
|
||||
orrs r6,r5 // [1] add pixel to accumulator
|
||||
|
||||
// [5] load 4th pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r5,[r4,#0] // [2] load pixel
|
||||
lsls r5,#24 // [1] shift 3 bytes left
|
||||
orrs r6,r5 // [1] add pixel to accumulator
|
||||
|
||||
// [2] store 4 pixels
|
||||
stmia r0!,{r6} // [2] store 4 pixels
|
||||
|
||||
// check number of remaining pixels
|
||||
2: tst r7,r7 // check number of pixels
|
||||
beq 8f // end
|
||||
|
||||
// ---- [42 per 8 pixels] inner loop
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... (temporary - pixel accumulator 1)
|
||||
// R2 ... (temporary - pixel accumulator 2)
|
||||
// R3 ... interpolator base
|
||||
// R4 ... (temporary - get pointer to pixel, load pixel)
|
||||
// R7 ... width/8 (loop counter)
|
||||
|
||||
// [3] load 1st pixel
|
||||
6: ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r1,[r4,#0] // [2] load pixel
|
||||
|
||||
// [5] load 2nd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r4,[r4,#0] // [2] load pixel
|
||||
lsls r4,#8 // [1] shift 1 byte left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [5] load 3rd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r4,[r4,#0] // [2] load pixel
|
||||
lsls r4,#16 // [1] shift 2 bytes left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [5] load 4th pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r4,[r4,#0] // [2] load pixel
|
||||
lsls r4,#24 // [1] shift 3 bytes left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [3] load 1st pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r2,[r4,#0] // [2] load pixel
|
||||
|
||||
// [5] load 2nd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r4,[r4,#0] // [2] load pixel
|
||||
lsls r4,#8 // [1] shift 1 byte left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [5] load 3rd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r4,[r4,#0] // [2] load pixel
|
||||
lsls r4,#16 // [1] shift 2 bytes left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [5] load 4th pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r4,[r4,#0] // [2] load pixel
|
||||
lsls r4,#24 // [1] shift 3 bytes left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [3] store 8 pixels
|
||||
stmia r0!,{r1,r2} // [3] store 8 pixels
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r7,#1 // [1] 8-pixel counter
|
||||
bne 6b // [1,2] next 8-pixels
|
||||
|
||||
// pop registers
|
||||
8: pop {r4-r7,pc}
|
||||
|
||||
.align 2
|
||||
// pointer to Interp1 base
|
||||
RenderGraph8Mat_Interp:
|
||||
.word SIO_BASE+SIO_INTERP1_ACCUM0_OFFSET // addres of interpolator base
|
||||
|
||||
RenderGraph8Mat_Ctrl: // lane control word
|
||||
.word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT<<SIO_INTERP0_CTRL_LANE0_SHIFT_LSB)
|
||||
|
|
@ -1,340 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_GRAPH8PERSP
|
||||
//
|
||||
// ****************************************************************************
|
||||
// data ... image data
|
||||
// par ... pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL))
|
||||
// par2 ... LOW=number of bits of image width, HIGH=number of bits of image height
|
||||
// image width must be max. 4096 (= 1<<FRACT); image with and height must be power of 2
|
||||
// par3 ... horizon offset
|
||||
// wrapy ... segment height
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
#include "hardware/regs/sio.h" // registers of hardware divider
|
||||
#include "hardware/regs/addressmap.h" // SIO base address
|
||||
|
||||
#define ACCUM0_OFFSET 0
|
||||
#define ACCUM1_OFFSET 4
|
||||
#define BASE0_OFFSET 8
|
||||
#define BASE1_OFFSET 12
|
||||
#define BASE2_OFFSET 16
|
||||
#define POP_LANE0_OFFSET 20
|
||||
#define POP_LANE1_OFFSET 24
|
||||
#define POP_FULL_OFFSET 28
|
||||
#define PEEK_LANE0_OFFSET 32
|
||||
#define PEEK_LANE1_OFFSET 36
|
||||
#define PEEK_FULL_OFFSET 40
|
||||
#define CTRL_LANE0_OFFSET 44
|
||||
#define CTRL_LANE1_OFFSET 48
|
||||
#define ACCUM0_ADD_OFFSET 52
|
||||
#define ACCUM1_ADD_OFFSET 56
|
||||
#define BASE_1AND0_OFFSET 60
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// extern "C" u32* RenderGraph8Persp(u32* cbuf, int x, int y, int w, sSegm* segm);
|
||||
|
||||
// render 8-bit graphics GF_GRAPH8PERSP, with 2D matrix transformation,
|
||||
// using hardware interpolator inter1 (inter1 state is not saved during interrup)
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... start X coordinate (not used)
|
||||
// R2 ... start Y coordinate (in graphics lines)
|
||||
// R3 ... width to display (must be multiple of 4)
|
||||
// [stack] ... segm video segment sSegm
|
||||
// Output new pointer to data buffer.
|
||||
// 320 pixels takes ?? us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderGraph8Persp
|
||||
RenderGraph8Persp:
|
||||
|
||||
// Input registers and stack:
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... X coordinate (not used)
|
||||
// R2 ... Y coordinate
|
||||
// R3 ... remaining width
|
||||
// SP+0: R4
|
||||
// SP+4: R5
|
||||
// SP+8: R6
|
||||
// SP+12: R7
|
||||
// SP+16: LR
|
||||
// SP+20: video segment
|
||||
|
||||
// push registers
|
||||
push {r4-r7,lr}
|
||||
|
||||
// ---- prepare registers
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#20] // load video segment -> R4
|
||||
|
||||
// prepare current coordinate Y0 = y - h -> R12
|
||||
ldrh r1,[r4,#SSEGM_WRAPY] // get segment height -> R1
|
||||
subs r2,r1 // y - h = current Y coordinate
|
||||
mov r12,r2 // store current coordinate Y0 -> R12
|
||||
|
||||
// start calculating distance coefficient dist = FRACTMUL*h/(y - h + horiz + 1)
|
||||
lsls r6,r1,#FRACT // segment height * FRACTMUL -> R6
|
||||
ldr r5,RenderGraph8Persp_pSioBase // get address of SIO base -> R5
|
||||
str r6,[r5,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h
|
||||
ldrh r6,[r4,#SSEGM_PAR3] // horizon offset -> R6
|
||||
adds r2,r1 // y = current Y coordinate
|
||||
adds r6,r2 // horizon + y -> R6
|
||||
adds r6,#1 // horizon + y + 1 -> R6
|
||||
str r6,[r5,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz + 1
|
||||
|
||||
// prepare start coordinate X0 = -w/2 -> LR
|
||||
lsrs r5,r3,#1 // width/2
|
||||
negs r5,r5 // negate
|
||||
mov lr,r5 // store start coordinate X0 -> LR
|
||||
|
||||
// prepare number of 4-pixels (loop counter) -> R7
|
||||
lsrs r7,r3,#2 // width/4 -> R7
|
||||
|
||||
// get number of bits of image width "xbits" -> R1
|
||||
ldrh r1,[r4,#SSEGM_PAR2] // number of bits of image width -> R1
|
||||
|
||||
// get number of bits of image height "ybits" -> R2
|
||||
ldrh r2,[r4,#SSEGM_PAR2+2] // number of bits of image height -> R2
|
||||
|
||||
// prepare address of interpolator base -> R3
|
||||
ldr r3,RenderGraph8Persp_Interp // get address of interpolator base -> R3
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R1 ... number of bits of image width xbits
|
||||
// R2 ... number of bits of image height ybits
|
||||
// R3 ... interpolator base
|
||||
// R4 ... video segment
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// ---- setup interpolator
|
||||
|
||||
// set image base to base2
|
||||
ldr r6,[r4,#SSEGM_DATA] // load image base
|
||||
str r6,[r3,#BASE2_OFFSET] // set image base
|
||||
|
||||
// set control word of lane 1 - add raw lane base back to accumulator, shift "FRACT-xbits", mask xbits...xbits+ybits-1
|
||||
ldr r6,RenderGraph8Persp_Ctrl // load control word
|
||||
subs r6,r1 // FRACT - xbits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required)
|
||||
lsls r5,r1,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift xbits to mask LSB position -> R5
|
||||
orrs r6,r5 // add xbits to control word
|
||||
subs r1,#1 // xbits - 1 -> R1
|
||||
adds r5,r1,r2 // xbits-1+ybits -> R5
|
||||
lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to MSB mask position
|
||||
orrs r6,r5 // add to control word
|
||||
str r6,[r3,#CTRL_LANE1_OFFSET] // set control word of lane 1
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R1 ... image width xbits-1
|
||||
// R3 ... interpolator base
|
||||
// R4 ... video segment
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// set control word of lane 0 - add raw lane base back to accumulator, shift "FRACT", mask 0..xbits-1
|
||||
ldr r6,RenderGraph8Persp_Ctrl // load control word
|
||||
lsls r1,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift xbits-1 to mask MSB position
|
||||
orrs r6,r1 // add to control word
|
||||
str r6,[r3,#CTRL_LANE0_OFFSET] // set control word of lane 0
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R3 ... interpolator base
|
||||
// R4 ... video segment
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// ---- set matrix
|
||||
|
||||
// get pointer to matrix -> R4
|
||||
ldr r4,[r4,#SSEGM_PAR] // get pointer to matrix -> R4
|
||||
|
||||
// get distance coefficient dist -> R1
|
||||
ldr r1,RenderGraph8Persp_pSioBase // get address of SIO base -> R1
|
||||
ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient
|
||||
|
||||
// r4+0 ... m11
|
||||
// r4+4 ... m12
|
||||
// r4+8 ... m13
|
||||
// r4+12 ... m21
|
||||
// r4+16 ... m22
|
||||
// r4+20 ... m23
|
||||
|
||||
// set m11 -> R5 base0
|
||||
ldr r5,[r4,#0] // load m11
|
||||
muls r5,r1 // m11*dist
|
||||
asrs r5,#FRACT // (m11*dist)>>FRACT
|
||||
str r5,[r3,#BASE0_OFFSET] // set base0
|
||||
|
||||
// set m21 -> R6 base1
|
||||
ldr r6,[r4,#12] // load m21
|
||||
muls r6,r1 // m11*dist
|
||||
asrs r6,#FRACT // (m11*dist)>>FRACT
|
||||
str r6,[r3,#BASE1_OFFSET] // set base1
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R1 ... distance coefficient
|
||||
// R3 ... interpolator base
|
||||
// R4 ... pointer to matrix
|
||||
// R5 ... m11
|
||||
// R6 ... m21
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// set x0*m11 + y0*m12 + m13 -> accum0
|
||||
mov r2,lr // start coordinate X0 -> X2
|
||||
muls r5,r2 // x0*m11 -> R5
|
||||
muls r2,r6 // x0*m21 -> R2
|
||||
mov lr,r1 // save distance coefficient -> LR
|
||||
ldr r6,[r4,#4] // load m12 -> R6
|
||||
muls r1,r6 // m12*dist
|
||||
asrs r1,#FRACT // (m12*dist)>>FRACT
|
||||
mov r6,r12 // load coordinate Y0 -> R6
|
||||
muls r1,r6 // y0*m12 -> R1
|
||||
adds r5,r1 // x0*m11 + y0*m12 -> R5
|
||||
ldr r1,[r4,#8] // load m13 -> R1
|
||||
adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5
|
||||
str r5,[r3,#ACCUM0_OFFSET] // set accum0
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R2 ... x0*m21
|
||||
// R3 ... interpolator base
|
||||
// R4 ... pointer to matrix
|
||||
// R6 ... current coordinate Y0
|
||||
// R7 ... width/4
|
||||
// LR ... distance coefficient
|
||||
|
||||
// set x0*m21 + y0*m22 + m23 -> accum1
|
||||
ldr r1,[r4,#16] // load m22 -> R1
|
||||
mov r5,lr // distance coefficient -> R5
|
||||
muls r1,r5 // m22*dist
|
||||
asrs r1,#FRACT // (m22*dist)>>FRACT
|
||||
muls r1,r6 // y0*m22 -> R1
|
||||
adds r2,r1 // x0*m21 + y0*m22 -> R2
|
||||
ldr r1,[r4,#20] // load m23 -> R1
|
||||
adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2
|
||||
str r2,[r3,#ACCUM1_OFFSET] // set accum1
|
||||
|
||||
// ---- process odd 4-pixel
|
||||
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R3 ... interpolator base
|
||||
// R4 ... (temporary - get pointer to pixel)
|
||||
// R5 ... (temporary - load pixel)
|
||||
// R6 ... (temporary - pixel accumulator)
|
||||
// R7 ... width/4 (loop counter)
|
||||
|
||||
lsrs r7,#1 // width/4/2
|
||||
bcc 2f // no odd 4-pixel
|
||||
|
||||
// [3] load 1st pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r6,[r4,#0] // [2] load pixel
|
||||
|
||||
// [5] load 2nd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r5,[r4,#0] // [2] load pixel
|
||||
lsls r5,#8 // [1] shift 1 byte left
|
||||
orrs r6,r5 // [1] add pixel to accumulator
|
||||
|
||||
// [5] load 3rd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r5,[r4,#0] // [2] load pixel
|
||||
lsls r5,#16 // [1] shift 2 bytes left
|
||||
orrs r6,r5 // [1] add pixel to accumulator
|
||||
|
||||
// [5] load 4th pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r5,[r4,#0] // [2] load pixel
|
||||
lsls r5,#24 // [1] shift 3 bytes left
|
||||
orrs r6,r5 // [1] add pixel to accumulator
|
||||
|
||||
// [2] store 4 pixels
|
||||
stmia r0!,{r6} // [2] store 4 pixels
|
||||
|
||||
// check number of remaining pixels
|
||||
2: tst r7,r7 // check number of pixels
|
||||
beq 8f // end
|
||||
|
||||
// ---- [42 per 8 pixels] inner loop
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... (temporary - pixel accumulator 1)
|
||||
// R2 ... (temporary - pixel accumulator 2)
|
||||
// R3 ... interpolator base
|
||||
// R4 ... (temporary - get pointer to pixel, load pixel)
|
||||
// R7 ... width/8 (loop counter)
|
||||
|
||||
// [3] load 1st pixel
|
||||
6: ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r1,[r4,#0] // [2] load pixel
|
||||
|
||||
// [5] load 2nd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r4,[r4,#0] // [2] load pixel
|
||||
lsls r4,#8 // [1] shift 1 byte left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [5] load 3rd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r4,[r4,#0] // [2] load pixel
|
||||
lsls r4,#16 // [1] shift 2 bytes left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [5] load 4th pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r4,[r4,#0] // [2] load pixel
|
||||
lsls r4,#24 // [1] shift 3 bytes left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [3] load 1st pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r2,[r4,#0] // [2] load pixel
|
||||
|
||||
// [5] load 2nd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r4,[r4,#0] // [2] load pixel
|
||||
lsls r4,#8 // [1] shift 1 byte left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [5] load 3rd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r4,[r4,#0] // [2] load pixel
|
||||
lsls r4,#16 // [1] shift 2 bytes left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [5] load 4th pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r4,[r4,#0] // [2] load pixel
|
||||
lsls r4,#24 // [1] shift 3 bytes left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [3] store 8 pixels
|
||||
stmia r0!,{r1,r2} // [3] store 8 pixels
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r7,#1 // [1] 8-pixel counter
|
||||
bne 6b // [1,2] next 8-pixels
|
||||
|
||||
// pop registers
|
||||
8: pop {r4-r7,pc}
|
||||
|
||||
.align 2
|
||||
// pointer to SIO base
|
||||
RenderGraph8Persp_pSioBase:
|
||||
.word SIO_BASE // addres of SIO base
|
||||
|
||||
// pointer to Interp1 base
|
||||
RenderGraph8Persp_Interp:
|
||||
.word SIO_BASE+SIO_INTERP1_ACCUM0_OFFSET // addres of interpolator base
|
||||
|
||||
RenderGraph8Persp_Ctrl: // lane control word
|
||||
.word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT<<SIO_INTERP0_CTRL_LANE0_SHIFT_LSB)
|
||||
|
|
@ -1,317 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_GTEXT
|
||||
//
|
||||
// ****************************************************************************
|
||||
// u32 par SSEGM_PAR pointer to the font
|
||||
// u32 par2 SSEGM_PAR2 pointer to font gradient
|
||||
// u16 par3 LOW background color, HIGH font height
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
#include "hardware/regs/sio.h" // registers of hardware divider
|
||||
#include "hardware/regs/addressmap.h" // SIO base address
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// render font pixel mask
|
||||
.extern RenderTextMask // u32 RenderTextMask[512];
|
||||
|
||||
// extern "C" u8* RenderGText(u8* dbuf, int x, int y, int w, sSegm* segm)
|
||||
|
||||
// render 8-pixel gradient color text GF_GTEXT
|
||||
// R0 ... destination data buffer
|
||||
// R1 ... start X coordinate (in pixels, must be multiple of 4)
|
||||
// R2 ... start Y coordinate (in graphics lines)
|
||||
// R3 ... width to display (must be multiple of 4 and > 0)
|
||||
// [stack] ... segm video segment sSegm
|
||||
// Output new pointer to destination data buffer.
|
||||
// 320 pixels takes 8.3 us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderGText
|
||||
RenderGText:
|
||||
|
||||
// push registers
|
||||
push {r1-r7,lr}
|
||||
mov r4,r8
|
||||
push {r4}
|
||||
|
||||
// Stack content:
|
||||
// SP+0: R8
|
||||
// SP+4: R1 start X coordinate (later: base pointer to gradient array)
|
||||
// SP+8: R2 start Y coordinate (later: base pointer to text data row)
|
||||
// SP+12: R3 width to display
|
||||
// SP+16: R4
|
||||
// SP+20: R5
|
||||
// SP+24: R6
|
||||
// SP+28: R7
|
||||
// SP+32: LR
|
||||
// SP+36: video segment (later: wrap width in X direction)
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#36] // load video segment -> R4
|
||||
|
||||
// start divide Y/font height
|
||||
ldr r6,RenderGText_pSioBase // get address of SIO base -> R6
|
||||
str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate
|
||||
ldrh r2,[r4,#SSEGM_PAR3] // font height -> R2
|
||||
str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height
|
||||
|
||||
// - now we must wait at least 8 clock cycles to get result of division
|
||||
|
||||
// [6] get wrap width -> [SP+36]
|
||||
ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width
|
||||
movs r7,#3 // [1] mask to align to 32-bit
|
||||
bics r5,r7 // [1] align wrap
|
||||
str r5,[sp,#36] // [2] save wrap width
|
||||
|
||||
// [1] align X coordinate to 32-bit
|
||||
bics r1,r7 // [1]
|
||||
|
||||
// [3] align remaining width
|
||||
bics r3,r7 // [1]
|
||||
str r3,[sp,#12] // [2] save new width
|
||||
|
||||
// load result of division Y/font_height -> R6 Y relative at row, R7 Y row
|
||||
// Note: QUOTIENT must be read last
|
||||
ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row
|
||||
ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row
|
||||
|
||||
// pointer to font line -> R8
|
||||
lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long)
|
||||
ldr r3,[r4,#SSEGM_PAR] // get pointer to font
|
||||
add r3,r5 // line offset + font base -> pointer to current font line R3
|
||||
mov r8,r3
|
||||
|
||||
// base pointer to text data (without X) -> [SP+8], R2
|
||||
ldrh r5,[r4,#SSEGM_WB] // get pitch of rows
|
||||
muls r2,r5 // Y * WB -> offset of row in text buffer
|
||||
ldr r5,[r4,#SSEGM_DATA] // pointer to data
|
||||
add r2,r5 // base address of text buffer
|
||||
str r2,[sp,#8] // save pointer to text buffer
|
||||
|
||||
// base pointer to gradient array -> [SP+4], R3
|
||||
ldr r3,[r4,#SSEGM_PAR2] // pointer to graient array
|
||||
str r3,[sp,#4] // save pointer to gradient array
|
||||
|
||||
// prepare pointer to text data with X -> R2
|
||||
add r3,r1 // pointer to source gradient array
|
||||
lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width)
|
||||
add r2,r6 // pointer to source text buffer -> R2
|
||||
|
||||
// prepare background color, expand to 32 bits -> R4
|
||||
ldrb r4,[r4,#SSEGM_PAR3] // load background color
|
||||
lsls r5,r4,#8 // shift background color << 8
|
||||
orrs r5,r4 // color expanded to 16 bits
|
||||
lsls r4,r5,#16 // shift 16-bit color << 16
|
||||
orrs r4,r5 // color expanded to 32 bits
|
||||
|
||||
// prepare pointer to conversion table -> LR
|
||||
ldr r5,RenderGText_Addr // get pointer to conversion table -> R5
|
||||
mov lr,r5 // conversion table -> LR
|
||||
|
||||
// ---- render 2nd half of first character
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... start X coordinate
|
||||
// R2 ... pointer to source text buffer
|
||||
// R3 ... pointer to gradient array
|
||||
// R4 ... background color (expanded to 32-bit)
|
||||
// R5..R7 ... (temporary)
|
||||
// R8 ... pointer to font line
|
||||
// LR ... pointer to conversion table
|
||||
// [SP+4] ... base pointer to gradient array
|
||||
// [SP+8] ... base pointer to text data (without X)
|
||||
// [SP+12] ... remaining width
|
||||
// [SP+36] ... wrap width
|
||||
|
||||
// check bit 2 of X coordinate - check if image starts with 2nd half of first character
|
||||
lsls r6,r1,#29 // check bit 2 of X coordinate
|
||||
bpl 2f // bit 2 not set, starting even 4-pixels
|
||||
|
||||
// [6] load font sample -> R5
|
||||
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
|
||||
adds r2,#1 // [1] shift pointer to source text buffer
|
||||
add r5,r8 // [1] pointer to font line
|
||||
ldrb r5,[r5] // [2] load font sample -> R5
|
||||
|
||||
// [3] load foreground color, XOR with background -> R6
|
||||
ldmia r3!,{r6} // [2] load foreground color from gradient buffer
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// [2] prepare conversion table -> R5
|
||||
lsls r5,#3 // [1] multiply font sample * 8
|
||||
add r5,lr // [1] add pointer to conversion table
|
||||
|
||||
// [6] convert second 4 pixels (lower 4 bits)
|
||||
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
stmia r0!,{r7} // [2] store second 4 pixels
|
||||
|
||||
// shift X coordinate
|
||||
adds r1,#4 // shift X coordinate
|
||||
|
||||
// check end of segment
|
||||
ldr r7,[sp,#36] // load wrap width
|
||||
cmp r1,r7 // end of segment?
|
||||
blo 1f
|
||||
movs r1,#0 // reset X coordinate
|
||||
ldr r2,[sp,#8] // get base pointer to text data -> R2
|
||||
ldr r3,[sp,#4] // get base pointer to gradient array -> R3
|
||||
|
||||
// shift remaining width
|
||||
1: ldr r7,[sp,#12] // get remaining width
|
||||
subs r7,#4 // shift width
|
||||
str r7,[sp,#12] // save new width
|
||||
|
||||
// prepare wrap width - start X -> R7
|
||||
2: ldr r7,[sp,#36] // load wrap width
|
||||
subs r7,r1 // pixels remaining to end of segment
|
||||
|
||||
// ---- start outer loop, render one part of segment
|
||||
// Outer loop variables (* prepared before outer loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... number of characters to generate in one part of segment
|
||||
// R2 ... *pointer to source text buffer
|
||||
// R3 ... *pointer to gradient array
|
||||
// R4 ... *background color (expanded to 32-bit)
|
||||
// R5 ... (temporary)
|
||||
// R6 ... (temporary)
|
||||
// R7 ... *wrap width of this segment, later: temporary
|
||||
// R8 ... *pointer to font line
|
||||
// LR ... *pointer to conversion table
|
||||
// [SP+4] ... base pointer to gradient array
|
||||
// [SP+8] ... base pointer to text data (without X)
|
||||
// [SP+12] ... remaining width
|
||||
// [SP+36] ... wrap width
|
||||
|
||||
RenderGText_OutLoop:
|
||||
|
||||
// limit wrap width by total width -> R7
|
||||
ldr r6,[sp,#12] // get remaining width
|
||||
cmp r7,r6 // compare with wrap width
|
||||
bls 2f // width is OK
|
||||
mov r7,r6 // limit wrap width
|
||||
|
||||
// check if remain whole characters
|
||||
2: cmp r7,#8 // check number of remaining pixels
|
||||
bhs 5f // enough characters remain
|
||||
|
||||
// check if 1st part of last character remains
|
||||
cmp r7,#4 // check 1st part of last character
|
||||
blo 3f // all done
|
||||
|
||||
// ---- render 1st part of last character
|
||||
|
||||
RenderGText_Last:
|
||||
|
||||
// [6] load font sample -> R5
|
||||
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
|
||||
adds r2,#1 // [1] shift pointer to source text buffer
|
||||
add r5,r8 // [1] pointer to font line
|
||||
ldrb r5,[r5] // [2] load font sample -> R5
|
||||
|
||||
// [3] load foreground color, XOR with background -> R6
|
||||
ldmia r3!,{r6} // [2] load foreground color from gradient buffer
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// [2] prepare conversion table -> R5
|
||||
lsls r5,#3 // [1] multiply font sample * 8
|
||||
add r5,lr // [1] add pointer to conversion table
|
||||
|
||||
// [6] convert first 4 pixels (higher 4 bits)
|
||||
ldr r1,[r5,#0] // [2] load mask for higher 4 bits
|
||||
ands r1,r6 // [1] mask foreground color
|
||||
eors r1,r4 // [1] combine with background color
|
||||
stmia r0!,{r1} // [2] store first 4 pixels
|
||||
|
||||
// check if continue with next segment
|
||||
ldr r2,[sp,#8] // get base pointer to text data -> R2
|
||||
ldr r3,[sp,#4] // get base pointer to gradient array -> R3
|
||||
cmp r7,#4
|
||||
bhi RenderGText_OutLoop
|
||||
|
||||
// pop registers and return
|
||||
3: pop {r4}
|
||||
mov r8,r4
|
||||
pop {r1-r7,pc}
|
||||
|
||||
// ---- prepare to render whole characters
|
||||
|
||||
// prepare number of whole characters to render -> R1
|
||||
5: lsrs r1,r7,#2 // shift to get number of characters*2
|
||||
lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5
|
||||
subs r6,r5 // get remaining width
|
||||
str r6,[sp,#12] // save new remaining width
|
||||
subs r1,#1 // number of characters*2 - 1
|
||||
|
||||
// ---- [28*N-1] start inner loop, render characters in one part of segment
|
||||
// Inner loop variables (* prepared before inner loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... *number of characters to generate*2 - 1 (loop counter)
|
||||
// R2 ... *pointer to source text buffer
|
||||
// R3 ... *pointer to gradient array
|
||||
// R4 ... *background color (expanded to 32-bit)
|
||||
// R5 ... (temporary)
|
||||
// R6 ... foreground color
|
||||
// R7 ... font sample
|
||||
// R8 ... *pointer to font line
|
||||
// LR ... *pointer to conversion table
|
||||
// [SP+4] ... base pointer to gradient array
|
||||
// [SP+8] ... base pointer to text data (without X)
|
||||
// [SP+12] ... remaining width
|
||||
// [SP+36] ... wrap width
|
||||
|
||||
RenderGText_InLoop:
|
||||
|
||||
// [6] load font sample -> R7
|
||||
ldrb r7,[r2,#0] // [2] load character from source text buffer -> R7
|
||||
adds r2,#1 // [1] shift pointer to source text buffer
|
||||
add r7,r8 // [1] pointer to font line
|
||||
ldrb r7,[r7] // [2] load font sample -> R7
|
||||
|
||||
// [3] load foreground color, XOR with background -> R6
|
||||
ldmia r3!,{r6} // [2] load foreground color from gradient buffer
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// [2] prepare conversion table -> R7
|
||||
lsls r7,#3 // [1] multiply font sample * 8
|
||||
add r7,lr // [1] add pointer to conversion table
|
||||
|
||||
// [4] convert first 4 pixels (higher 4 bits) -> R5
|
||||
ldr r5,[r7,#0] // [2] load mask for higher 4 bits
|
||||
ands r5,r6 // [1] mask foreground color
|
||||
eors r5,r4 // [1] combine with background color
|
||||
|
||||
// [3] load foreground color, XOR with background -> R6
|
||||
ldmia r3!,{r6} // [2] load foreground color from gradient buffer
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// [7] convert and store second 4 pixels (lower 4 bits)
|
||||
ldr r7,[r7,#4] // [2] load mask for lower 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
stmia r0!,{r5,r7} // [3] store 8 pixels
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r1,#2 // [1] shift loop counter
|
||||
bhi RenderGText_InLoop // [1,2] > 0, render next whole character
|
||||
|
||||
// ---- end inner loop, continue with last character, or start new part
|
||||
|
||||
// continue to outer loop
|
||||
ldr r7,[sp,#36] // load wrap width
|
||||
beq RenderGText_Last // render 1st half of last character
|
||||
ldr r2,[sp,#8] // get base pointer to text data -> R2
|
||||
ldr r3,[sp,#4] // get base pointer to gradient array -> R3
|
||||
b RenderGText_OutLoop // go back to outer loop
|
||||
|
||||
.align 2
|
||||
RenderGText_Addr:
|
||||
.word RenderTextMask
|
||||
RenderGText_pSioBase:
|
||||
.word SIO_BASE // addres of SIO base
|
||||
|
|
@ -1,431 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_LEVEL
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// render font pixel mask
|
||||
.extern RenderTextMask // u32 RenderTextMask[512];
|
||||
|
||||
// extern "C" u8* RenderLevel(u8* dbuf, int x, int y, int w, sSegm* segm);
|
||||
|
||||
// render level graph GF_LEVEL
|
||||
// dbuf ... destination data buffer
|
||||
// x ... start X coordinate (must be multiple of 4)
|
||||
// y ... start Y coordinate
|
||||
// w ... width of this segment (must be multiple of 4)
|
||||
// segm ... video segment
|
||||
// Output new dbuf pointer.
|
||||
// 320 pixels takes 14 us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderLevel
|
||||
RenderLevel:
|
||||
|
||||
// push registers
|
||||
push {r1-r7,lr}
|
||||
|
||||
// Input registers and stack content:
|
||||
// R0 ... pointer to testination data buffer
|
||||
// SP+0: R1 start X coordinate (later: zero level)
|
||||
// SP+4: R2 start Y coordinate (later: base pointer to sample data)
|
||||
// SP+8: R3 width to display
|
||||
// SP+12: R4
|
||||
// SP+16: R5
|
||||
// SP+20: R6
|
||||
// SP+24: R7
|
||||
// SP+28: LR
|
||||
// SP+32: video segment (later: wrap width in X direction)
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#32] // load video segment -> R4
|
||||
|
||||
// get wrap width -> [SP+32]
|
||||
ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width
|
||||
movs r7,#3 // mask to align to 32-bit
|
||||
bics r5,r7 // align wrap
|
||||
str r5,[sp,#32] // save wrap width
|
||||
|
||||
// align X coordinate to 32-bit -> R1
|
||||
bics r1,r7
|
||||
|
||||
// align remaining width -> [SP+8]
|
||||
bics r3,r7
|
||||
str r3,[sp,#8] // save new width
|
||||
|
||||
// current Y in direction from bottom to up -> R5
|
||||
ldrh r5,[r4,#SSEGM_WRAPY] // get wrap height
|
||||
subs r5,#1 // wrapy - 1
|
||||
subs r5,r2 // subtract Y, get Y relative to bottom -> R5
|
||||
|
||||
// get zero level -> [SP+0]
|
||||
ldrb r3,[r4,#SSEGM_PAR2] // get zero level
|
||||
str r3,[sp,#0] // save zero level
|
||||
|
||||
// base pointer to sample data (without X) -> [SP+4], R2
|
||||
ldr r2,[r4,#SSEGM_DATA] // pointer to sample data
|
||||
str r2,[sp,#4] // save pointer to sample buffer
|
||||
|
||||
// prepare pointer to sample data with X -> R2
|
||||
add r2,r1 // pointer to source sample buffer -> R2
|
||||
|
||||
// prepare foreground color, expand to 32-bit -> R6
|
||||
ldrb r6,[r4,#SSEGM_PAR+1] // load foreground color
|
||||
lsls r3,r6,#8 // [1] shift foreground color << 8
|
||||
orrs r3,r6 // [1] color expanded to 16 bits
|
||||
lsls r6,r3,#16 // [1] shift 16-bit color << 16
|
||||
orrs r6,r3 // [1] color expanded to 32 bits
|
||||
|
||||
// prepare background color, expand to 32 bits -> R4
|
||||
ldrb r4,[r4,#SSEGM_PAR] // load background color
|
||||
lsls r3,r4,#8 // shift background color << 8
|
||||
orrs r3,r4 // color expanded to 16 bits
|
||||
lsls r4,r3,#16 // shift 16-bit color << 16
|
||||
orrs r4,r3 // color expanded to 32 bits
|
||||
|
||||
// [1] XOR foreground and background color -> R6
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// prepare pointer to conversion table -> LR
|
||||
ldr r3,RenderLevel_Addr // get pointer to conversion table -> R5
|
||||
mov lr,r3 // conversion table -> LR
|
||||
|
||||
// prepare wrap width - start X -> R7
|
||||
ldr r7,[sp,#32] // load wrap width
|
||||
subs r7,r1 // pixels remaining to end of segment
|
||||
|
||||
// last 4-pixels
|
||||
cmp r7,#4
|
||||
bhi RenderLevel_OutLoop
|
||||
ldr r7,[sp,#32] // load wrap width
|
||||
b RenderLevel_Last // render last 4-pixels of first segment
|
||||
|
||||
// ---- start outer loop, render one part of segment
|
||||
// Outer loop variables (* prepared before outer loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... number of 4-pixels to generate in one part of segment
|
||||
// R2 ... *pointer to source sample buffer
|
||||
// R3 ... remaining width, later: (temporary)
|
||||
// R4 ... *background color (expanded to 32-bit)
|
||||
// R5 ... *current line Y (in direction from bottom to up)
|
||||
// R6 ... *foreground color (expanded to 32-bit)
|
||||
// R7 ... *wrap width of this segment, later: (temporary)
|
||||
// LR ... *pointer to conversion table
|
||||
// [SP+0] ... *zero level
|
||||
// [SP+4] ... *base pointer to sample data (without X)
|
||||
// [SP+8] ... *remaining width
|
||||
// [SP+32] ... *wrap width
|
||||
|
||||
RenderLevel_OutLoop:
|
||||
|
||||
// limit wrap width by total width -> R7
|
||||
ldr r3,[sp,#8] // get remaining width
|
||||
cmp r7,r3 // compare with wrap width
|
||||
bls 2f // width is OK
|
||||
mov r7,r3 // limit wrap width
|
||||
|
||||
// check number of pixels
|
||||
2: cmp r7,#8 // check number of remaining pixels
|
||||
bhs 5f // enough pixels remain to render 8-pixels
|
||||
|
||||
// check last 4-pixels
|
||||
cmp r7,#4 // check last 4-pixels
|
||||
blo 3f // all done
|
||||
|
||||
// ---- render last 4 pixels
|
||||
|
||||
RenderLevel_Last:
|
||||
|
||||
// check half of graph
|
||||
ldr r3,[sp,#0] // get zero level
|
||||
cmp r5,r3 // check current line
|
||||
blo RenderLevel_Last2 // bottom half of graph
|
||||
|
||||
// ---- top half
|
||||
|
||||
// [1] clear sample accumulator
|
||||
movs r1,#0 // [1] clear sample accumulator
|
||||
|
||||
// [4] get sample 0
|
||||
ldrb r3,[r2,#0] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
|
||||
|
||||
// [4] get sample 1
|
||||
ldrb r3,[r2,#1] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
|
||||
|
||||
// [4] get sample 2
|
||||
ldrb r3,[r2,#2] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
|
||||
|
||||
// [4] get sample 3
|
||||
ldrb r3,[r2,#3] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
|
||||
adds r2,#4 // [1] shift pointer to source buffer
|
||||
|
||||
// [2] prepare conversion table -> R1
|
||||
lsls r1,#3 // [1] multiply sample * 8
|
||||
add r1,lr // [1] add pointer to conversion table
|
||||
|
||||
// [7] convert 4 pixels (lower 4 bits)
|
||||
ldr r1,[r1,#4] // [2] load mask for lower 4 bits
|
||||
ands r1,r6 // [1] mask foreground color
|
||||
eors r1,r4 // [1] combine with background color
|
||||
stmia r0!,{r1} // [3] store 4 pixels
|
||||
|
||||
b 7f
|
||||
|
||||
// ---- bottom half
|
||||
|
||||
RenderLevel_Last2:
|
||||
|
||||
// [1] clear sample accumulator
|
||||
movs r1,#0 // [1] clear sample accumulator
|
||||
|
||||
// [4] get sample 0
|
||||
ldrb r3,[r2,#0] // [2] get data sample -> R3
|
||||
cmp r5,r3 // [1] compare sample with current line
|
||||
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
|
||||
|
||||
// [4] get sample 1
|
||||
ldrb r3,[r2,#1] // [2] get data sample -> R3
|
||||
cmp r5,r3 // [1] compare sample with current line
|
||||
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
|
||||
|
||||
// [4] get sample 2
|
||||
ldrb r3,[r2,#2] // [2] get data sample -> R3
|
||||
cmp r5,r3 // [1] compare sample with current line
|
||||
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
|
||||
|
||||
// [4] get sample 3
|
||||
ldrb r3,[r2,#3] // [2] get data sample -> R3
|
||||
cmp r5,r3 // [1] compare sample with current line
|
||||
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
|
||||
adds r2,#4 // [1] shift pointer to source buffer
|
||||
|
||||
// [2] prepare conversion table -> R1
|
||||
lsls r1,#3 // [1] multiply sample * 8
|
||||
add r1,lr // [1] add pointer to conversion table
|
||||
|
||||
// [7] convert 4 pixels (lower 4 bits)
|
||||
ldr r1,[r1,#4] // [2] load mask for lower 4 bits
|
||||
ands r1,r6 // [1] mask foreground color
|
||||
eors r1,r4 // [1] combine with background color
|
||||
stmia r0!,{r1} // [3] store 4 pixels
|
||||
|
||||
|
||||
// check if continue with next segment
|
||||
7: ldr r2,[sp,#4] // get base pointer to sample data -> R2
|
||||
cmp r7,#4
|
||||
bhi RenderLevel_OutLoop
|
||||
|
||||
// pop registers and return
|
||||
3: pop {r1-r7,pc}
|
||||
|
||||
// ---- prepare to render 8-pixels
|
||||
|
||||
// prepare number of whole 4-pixels to render -> R1
|
||||
5: lsrs r1,r7,#2 // shift width to get number of 4-pixels
|
||||
lsls r7,r1,#2 // shift back to get number of pixels, rounded down -> R7
|
||||
subs r3,r7 // get remaining width
|
||||
str r3,[sp,#8] // save new remaining width
|
||||
subs r1,#1 // number of 4-pixels - 1
|
||||
|
||||
// check half of graph
|
||||
ldr r3,[sp,#0] // get zero level
|
||||
cmp r5,r3 // check current line
|
||||
blo RenderLevel_InLoopBot // bottom half of graph
|
||||
|
||||
// ---- [50*N-1] start inner loop, render in one part of segment - top half of graph
|
||||
// Inner loop variables (* prepared before inner loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... *number of 4-pixels to generate*2 - 1 (loop counter)
|
||||
// R2 ... *pointer to source sample buffer
|
||||
// R3 ... sample
|
||||
// R4 ... *background color (expanded to 32-bit)
|
||||
// R5 ... *current line Y (in direction from bottom to up)
|
||||
// R6 ... *foreground color (expanded to 32-bit)
|
||||
// R7 ... sample accumulator, conversion table
|
||||
// LR ... *pointer to conversion table
|
||||
// [SP+0] ... *zero level
|
||||
// [SP+4] ... *base pointer to sample data (without X)
|
||||
// [SP+8] ... *remaining width
|
||||
// [SP+32] ... *wrap width
|
||||
|
||||
RenderLevel_InLoopTop: // render 8 pixels in one loop step, top half of graph
|
||||
|
||||
// [1] clear sample accumulator
|
||||
movs r7,#0 // [1] clear sample accumulator
|
||||
|
||||
// [4] get sample 0
|
||||
ldrb r3,[r2,#0] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [4] get sample 1
|
||||
ldrb r3,[r2,#1] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [4] get sample 2
|
||||
ldrb r3,[r2,#2] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [4] get sample 3
|
||||
ldrb r3,[r2,#3] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [4] get sample 4
|
||||
ldrb r3,[r2,#4] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [4] get sample 5
|
||||
ldrb r3,[r2,#5] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [4] get sample 6
|
||||
ldrb r3,[r2,#6] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [5] get sample 7
|
||||
ldrb r3,[r2,#7] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
adds r2,#8 // [1] shift pointer to source buffer
|
||||
|
||||
// [2] prepare conversion table -> R7
|
||||
lsls r7,#3 // [1] multiply sample * 8
|
||||
add r7,lr // [1] add pointer to conversion table
|
||||
|
||||
// [4] convert first 4 pixels (higher 4 bits)
|
||||
ldr r3,[r7,#0] // [2] load mask for higher 4 bits
|
||||
ands r3,r6 // [1] mask foreground color
|
||||
eors r3,r4 // [1] combine with background color
|
||||
|
||||
// [7] convert second 4 pixels (lower 4 bits)
|
||||
ldr r7,[r7,#4] // [2] load mask for lower 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
stmia r0!,{r3,r7} // [3] store second 4 pixels
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r1,#2 // [1] shift loop counter
|
||||
bhi RenderLevel_InLoopTop // [1,2] > 0, render next whole 8-pixels
|
||||
|
||||
// ---- end inner loop, continue with last 4-pixels, or start new part
|
||||
|
||||
// continue to outer loop
|
||||
ldr r7,[sp,#32] // load wrap width
|
||||
8: beq RenderLevel_Last // render last 4-pixels
|
||||
ldr r2,[sp,#4] // get base pointer to sample data -> R2
|
||||
b RenderLevel_OutLoop // go back to outer loop
|
||||
|
||||
// ---- [50*N-1] start inner loop, render in one part of segment - bottom half of graph
|
||||
// Inner loop variables (* prepared before inner loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... *number of 4-pixels to generate*2 - 1 (loop counter)
|
||||
// R2 ... *pointer to source sample buffer
|
||||
// R3 ... sample
|
||||
// R4 ... *background color (expanded to 32-bit)
|
||||
// R5 ... *current line Y (in direction from bottom to up)
|
||||
// R6 ... *foreground color (expanded to 32-bit)
|
||||
// R7 ... sample accumulator, conversion table
|
||||
// LR ... *pointer to conversion table
|
||||
// [SP+0] ... *zero level
|
||||
// [SP+4] ... *base pointer to sample data (without X)
|
||||
// [SP+8] ... *remaining width
|
||||
// [SP+32] ... *wrap width
|
||||
|
||||
RenderLevel_InLoopBot: // render 8 pixels in one loop step, bottom half of graph
|
||||
|
||||
// [1] clear sample accumulator
|
||||
movs r7,#0 // [1] clear sample accumulator
|
||||
|
||||
// [4] get sample 0
|
||||
ldrb r3,[r2,#0] // [2] get data sample -> R3
|
||||
cmp r5,r3 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [4] get sample 1
|
||||
ldrb r3,[r2,#1] // [2] get data sample -> R3
|
||||
cmp r5,r3 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [4] get sample 2
|
||||
ldrb r3,[r2,#2] // [2] get data sample -> R3
|
||||
cmp r5,r3 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [4] get sample 3
|
||||
ldrb r3,[r2,#3] // [2] get data sample -> R3
|
||||
cmp r5,r3 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [4] get sample 4
|
||||
ldrb r3,[r2,#4] // [2] get data sample -> R3
|
||||
cmp r5,r3 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [4] get sample 5
|
||||
ldrb r3,[r2,#5] // [2] get data sample -> R3
|
||||
cmp r5,r3 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [4] get sample 6
|
||||
ldrb r3,[r2,#6] // [2] get data sample -> R3
|
||||
cmp r5,r3 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [5] get sample 7
|
||||
ldrb r3,[r2,#7] // [2] get data sample -> R3
|
||||
cmp r5,r3 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
adds r2,#8 // [1] shift pointer to source buffer
|
||||
|
||||
// [2] prepare conversion table -> R7
|
||||
lsls r7,#3 // [1] multiply sample * 8
|
||||
add r7,lr // [1] add pointer to conversion table
|
||||
|
||||
// [4] convert first 4 pixels (higher 4 bits)
|
||||
ldr r3,[r7,#0] // [2] load mask for higher 4 bits
|
||||
ands r3,r6 // [1] mask foreground color
|
||||
eors r3,r4 // [1] combine with background color
|
||||
|
||||
// [7] convert second 4 pixels (lower 4 bits)
|
||||
ldr r7,[r7,#4] // [2] load mask for lower 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
stmia r0!,{r3,r7} // [3] store second 4 pixels
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r1,#2 // [1] shift loop counter
|
||||
bhi RenderLevel_InLoopBot // [1,2] > 0, render next whole 8-pixels
|
||||
|
||||
// ---- end inner loop, continue with last 4-pixels, or start new part
|
||||
|
||||
// continue to outer loop
|
||||
ldr r7,[sp,#32] // load wrap width
|
||||
beq 8b // render last 4-pixels
|
||||
ldr r2,[sp,#4] // get base pointer to sample data -> R2
|
||||
b RenderLevel_OutLoop // go back to outer loop
|
||||
|
||||
.align 2
|
||||
RenderLevel_Addr:
|
||||
.word RenderTextMask
|
||||
|
|
@ -1,287 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_LEVELGRAD
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// render font pixel mask
|
||||
.extern RenderTextMask // u32 RenderTextMask[512];
|
||||
|
||||
// extern "C" u8* RenderLevelGrad(u8* dbuf, int x, int y, int w, sSegm* segm);
|
||||
|
||||
// render level gradient graph GF_LEVELGRAD
|
||||
// dbuf ... destination data buffer
|
||||
// x ... start X coordinate (must be multiple of 4)
|
||||
// y ... start Y coordinate
|
||||
// w ... width of this segment (must be multiple of 4)
|
||||
// segm ... video segment
|
||||
// Output new dbuf pointer.
|
||||
// 320 pixels takes 14 us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderLevelGrad
|
||||
RenderLevelGrad:
|
||||
|
||||
// push registers
|
||||
push {r1-r7,lr}
|
||||
|
||||
// Input registers and stack content:
|
||||
// R0 ... pointer to testination data buffer
|
||||
// SP+0: R1 start X coordinate
|
||||
// SP+4: R2 start Y coordinate (later: base pointer to sample data)
|
||||
// SP+8: R3 width to display
|
||||
// SP+12: R4
|
||||
// SP+16: R5
|
||||
// SP+20: R6
|
||||
// SP+24: R7
|
||||
// SP+28: LR
|
||||
// SP+32: video segment (later: wrap width in X direction)
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#32] // load video segment -> R4
|
||||
|
||||
// get wrap width -> [SP+32]
|
||||
ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width
|
||||
movs r7,#3 // mask to align to 32-bit
|
||||
bics r5,r7 // align wrap
|
||||
str r5,[sp,#32] // save wrap width
|
||||
|
||||
// align X coordinate to 32-bit -> R1
|
||||
bics r1,r7
|
||||
|
||||
// align remaining width -> [SP+8]
|
||||
bics r3,r7
|
||||
str r3,[sp,#8] // save new width
|
||||
|
||||
// current Y in direction from bottom to up -> R5
|
||||
ldrh r5,[r4,#SSEGM_WRAPY] // get wrap height
|
||||
subs r5,#1 // wrapy - 1
|
||||
subs r5,r2 // subtract Y, get Y relative to bottom -> R5
|
||||
|
||||
// base pointer to sample data (without X) -> [SP+4], R2
|
||||
ldr r2,[r4,#SSEGM_DATA] // pointer to sample data
|
||||
str r2,[sp,#4] // save pointer to sample buffer
|
||||
|
||||
// prepare pointer to sample data with X -> R2
|
||||
add r2,r1 // pointer to source sample buffer -> R2
|
||||
|
||||
// prepare foreground color, expand to 32-bit -> R6
|
||||
ldr r6,[r4,#SSEGM_PAR] // pointer to gradient 1
|
||||
ldrb r6,[r6,r5] // load foreground color
|
||||
lsls r3,r6,#8 // [1] shift foreground color << 8
|
||||
orrs r3,r6 // [1] color expanded to 16 bits
|
||||
lsls r6,r3,#16 // [1] shift 16-bit color << 16
|
||||
orrs r6,r3 // [1] color expanded to 32 bits
|
||||
|
||||
// prepare background color, expand to 32 bits -> R4
|
||||
ldr r4,[r4,#SSEGM_PAR2] // pointer to gradient 2
|
||||
ldrb r4,[r4,r5] // load background color
|
||||
lsls r3,r4,#8 // shift background color << 8
|
||||
orrs r3,r4 // color expanded to 16 bits
|
||||
lsls r4,r3,#16 // shift 16-bit color << 16
|
||||
orrs r4,r3 // color expanded to 32 bits
|
||||
|
||||
// [1] XOR foreground and background color -> R6
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// prepare pointer to conversion table -> LR
|
||||
ldr r3,RenderLevelGrad_Addr // get pointer to conversion table -> R5
|
||||
mov lr,r3 // conversion table -> LR
|
||||
|
||||
// prepare wrap width - start X -> R7
|
||||
ldr r7,[sp,#32] // load wrap width
|
||||
subs r7,r1 // pixels remaining to end of segment
|
||||
|
||||
// last 4-pixels
|
||||
cmp r7,#4
|
||||
bhi RenderLevelGrad_OutLoop
|
||||
ldr r7,[sp,#32] // load wrap width
|
||||
b RenderLevelGrad_Last // render last 4-pixels of first segment
|
||||
|
||||
// ---- start outer loop, render one part of segment
|
||||
// Outer loop variables (* prepared before outer loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... number of 4-pixels to generate in one part of segment
|
||||
// R2 ... *pointer to source sample buffer
|
||||
// R3 ... remaining width, later: (temporary)
|
||||
// R4 ... *background color (expanded to 32-bit)
|
||||
// R5 ... *current line Y (in direction from bottom to up)
|
||||
// R6 ... *foreground color (expanded to 32-bit)
|
||||
// R7 ... *wrap width of this segment, later: (temporary)
|
||||
// LR ... *pointer to conversion table
|
||||
// [SP+0]
|
||||
// [SP+4] ... *base pointer to sample data (without X)
|
||||
// [SP+8] ... *remaining width
|
||||
// [SP+32] ... *wrap width
|
||||
|
||||
RenderLevelGrad_OutLoop:
|
||||
|
||||
// limit wrap width by total width -> R7
|
||||
ldr r3,[sp,#8] // get remaining width
|
||||
cmp r7,r3 // compare with wrap width
|
||||
bls 2f // width is OK
|
||||
mov r7,r3 // limit wrap width
|
||||
|
||||
// check number of pixels
|
||||
2: cmp r7,#8 // check number of remaining pixels
|
||||
bhs 5f // enough pixels remain to render 8-pixels
|
||||
|
||||
// check last 4-pixels
|
||||
cmp r7,#4 // check last 4-pixels
|
||||
blo 3f // all done
|
||||
|
||||
// ---- render last 4 pixels
|
||||
|
||||
RenderLevelGrad_Last:
|
||||
|
||||
// [1] clear sample accumulator
|
||||
movs r1,#0 // [1] clear sample accumulator
|
||||
|
||||
// [4] get sample 0
|
||||
ldrb r3,[r2,#0] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
|
||||
|
||||
// [4] get sample 1
|
||||
ldrb r3,[r2,#1] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
|
||||
|
||||
// [4] get sample 2
|
||||
ldrb r3,[r2,#2] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
|
||||
|
||||
// [4] get sample 3
|
||||
ldrb r3,[r2,#3] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R1
|
||||
adds r2,#4 // [1] shift pointer to source buffer
|
||||
|
||||
// [2] prepare conversion table -> R1
|
||||
lsls r1,#3 // [1] multiply sample * 8
|
||||
add r1,lr // [1] add pointer to conversion table
|
||||
|
||||
// [7] convert 4 pixels (lower 4 bits)
|
||||
ldr r1,[r1,#4] // [2] load mask for lower 4 bits
|
||||
ands r1,r6 // [1] mask foreground color
|
||||
eors r1,r4 // [1] combine with background color
|
||||
stmia r0!,{r1} // [3] store 4 pixels
|
||||
|
||||
// check if continue with next segment
|
||||
7: ldr r2,[sp,#4] // get base pointer to sample data -> R2
|
||||
cmp r7,#4
|
||||
bhi RenderLevelGrad_OutLoop
|
||||
|
||||
// pop registers and return
|
||||
3: pop {r1-r7,pc}
|
||||
|
||||
// ---- prepare to render 8-pixels
|
||||
|
||||
// prepare number of whole 4-pixels to render -> R1
|
||||
5: lsrs r1,r7,#2 // shift width to get number of 4-pixels
|
||||
lsls r7,r1,#2 // shift back to get number of pixels, rounded down -> R7
|
||||
subs r3,r7 // get remaining width
|
||||
str r3,[sp,#8] // save new remaining width
|
||||
subs r1,#1 // number of 4-pixels - 1
|
||||
|
||||
// ---- [50*N-1] start inner loop, render in one part of segment
|
||||
// Inner loop variables (* prepared before inner loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... *number of 4-pixels to generate*2 - 1 (loop counter)
|
||||
// R2 ... *pointer to source sample buffer
|
||||
// R3 ... sample
|
||||
// R4 ... *background color (expanded to 32-bit)
|
||||
// R5 ... *current line Y (in direction from bottom to up)
|
||||
// R6 ... *foreground color (expanded to 32-bit)
|
||||
// R7 ... sample accumulator, conversion table
|
||||
// LR ... *pointer to conversion table
|
||||
// [SP+0] ...
|
||||
// [SP+4] ... *base pointer to sample data (without X)
|
||||
// [SP+8] ... *remaining width
|
||||
// [SP+32] ... *wrap width
|
||||
|
||||
RenderLevelGrad_InLoopTop: // render 8 pixels in one loop step, top half of graph
|
||||
|
||||
// [1] clear sample accumulator
|
||||
movs r7,#0 // [1] clear sample accumulator
|
||||
|
||||
// [4] get sample 0
|
||||
ldrb r3,[r2,#0] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [4] get sample 1
|
||||
ldrb r3,[r2,#1] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [4] get sample 2
|
||||
ldrb r3,[r2,#2] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [4] get sample 3
|
||||
ldrb r3,[r2,#3] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [4] get sample 4
|
||||
ldrb r3,[r2,#4] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [4] get sample 5
|
||||
ldrb r3,[r2,#5] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [4] get sample 6
|
||||
ldrb r3,[r2,#6] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [5] get sample 7
|
||||
ldrb r3,[r2,#7] // [2] get data sample -> R3
|
||||
cmp r3,r5 // [1] compare sample with current line
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
adds r2,#8 // [1] shift pointer to source buffer
|
||||
|
||||
// [2] prepare conversion table -> R7
|
||||
lsls r7,#3 // [1] multiply sample * 8
|
||||
add r7,lr // [1] add pointer to conversion table
|
||||
|
||||
// [4] convert first 4 pixels (higher 4 bits)
|
||||
ldr r3,[r7,#0] // [2] load mask for higher 4 bits
|
||||
ands r3,r6 // [1] mask foreground color
|
||||
eors r3,r4 // [1] combine with background color
|
||||
|
||||
// [7] convert second 4 pixels (lower 4 bits)
|
||||
ldr r7,[r7,#4] // [2] load mask for lower 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
stmia r0!,{r3,r7} // [3] store second 4 pixels
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r1,#2 // [1] shift loop counter
|
||||
bhi RenderLevelGrad_InLoopTop // [1,2] > 0, render next whole 8-pixels
|
||||
|
||||
// ---- end inner loop, continue with last 4-pixels, or start new part
|
||||
|
||||
// continue to outer loop
|
||||
ldr r7,[sp,#32] // load wrap width
|
||||
8: beq RenderLevelGrad_Last // render last 4-pixels
|
||||
ldr r2,[sp,#4] // get base pointer to sample data -> R2
|
||||
b RenderLevelGrad_OutLoop // go back to outer loop
|
||||
|
||||
.align 2
|
||||
RenderLevelGrad_Addr:
|
||||
.word RenderTextMask
|
||||
|
|
@ -1,288 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_MTEXT
|
||||
//
|
||||
// ****************************************************************************
|
||||
// u32 par SSEGM_PAR pointer to the font
|
||||
// u32 par2 SSEGM_PAR2 LOW background color, HIGH foreground color
|
||||
// u16 par3 font height
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
#include "hardware/regs/sio.h" // registers of hardware divider
|
||||
#include "hardware/regs/addressmap.h" // SIO base address
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// render font pixel mask
|
||||
.extern RenderTextMask // u32 RenderTextMask[512];
|
||||
|
||||
// extern "C" u8* RenderMText(u8* dbuf, int x, int y, int w, sSegm* segm)
|
||||
|
||||
// render 8-pixel mono text GF_MTEXT
|
||||
// R0 ... destination data buffer
|
||||
// R1 ... start X coordinate (in pixels, must be multiple of 4)
|
||||
// R2 ... start Y coordinate (in graphics lines)
|
||||
// R3 ... width to display (must be multiple of 4 and > 0)
|
||||
// [stack] ... segm video segment sSegm
|
||||
// Output new pointer to destination data buffer.
|
||||
// 320 pixels takes 6.9 us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderMText
|
||||
RenderMText:
|
||||
|
||||
// push registers
|
||||
push {r1-r7,lr}
|
||||
|
||||
// Stack content:
|
||||
// SP+0: R1 start X coordinate
|
||||
// SP+4: R2 start Y coordinate (later: base pointer to text data row)
|
||||
// SP+8: R3 width to display
|
||||
// SP+12: R4
|
||||
// SP+16: R5
|
||||
// SP+20: R6
|
||||
// SP+24: R7
|
||||
// SP+28: LR
|
||||
// SP+32: video segment (later: wrap width in X direction)
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#32] // load video segment -> R4
|
||||
|
||||
// start divide Y/font height
|
||||
ldr r6,RenderMText_pSioBase // get address of SIO base -> R6
|
||||
str r2,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate
|
||||
ldrh r2,[r4,#SSEGM_PAR3] // font height -> R2
|
||||
str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, font height
|
||||
|
||||
// - now we must wait at least 8 clock cycles to get result of division
|
||||
|
||||
// [6] get wrap width -> [SP+32]
|
||||
ldrh r5,[r4,#SSEGM_WRAPX] // [2] get wrap width
|
||||
movs r7,#3 // [1] mask to align to 32-bit
|
||||
bics r5,r7 // [1] align wrap
|
||||
str r5,[sp,#32] // [2] save wrap width
|
||||
|
||||
// [1] align X coordinate to 32-bit
|
||||
bics r1,r7 // [1]
|
||||
|
||||
// [3] align remaining width
|
||||
bics r3,r7 // [1]
|
||||
str r3,[sp,#8] // [2] save new width
|
||||
|
||||
// load result of division Y/font_height -> R6 Y relative at row, R7 Y row
|
||||
// Note: QUOTIENT must be read last
|
||||
ldr r5,[r6,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R5, Y coordinate relative to current row
|
||||
ldr r2,[r6,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R2, index of row
|
||||
|
||||
// pointer to font line -> R3
|
||||
lsls r5,#8 // multiply Y relative * 256 (1 font line is 256 bytes long)
|
||||
ldr r3,[r4,#SSEGM_PAR] // get pointer to font
|
||||
add r3,r5 // line offset + font base -> pointer to current font line R3
|
||||
|
||||
// base pointer to text data (without X) -> [SP+4], R2
|
||||
ldrh r5,[r4,#SSEGM_WB] // get pitch of rows
|
||||
muls r2,r5 // Y * WB -> offset of row in text buffer
|
||||
ldr r5,[r4,#SSEGM_DATA] // pointer to data
|
||||
add r2,r5 // base address of text buffer
|
||||
str r2,[sp,#4] // save pointer to text buffer
|
||||
|
||||
// prepare pointer to text data with X -> R2
|
||||
lsrs r6,r1,#3 // convert X to character index (1 character is 8 pixels width)
|
||||
add r2,r6 // pointer to source text buffer -> R2
|
||||
|
||||
// prepare foreground color, expand to 32-bit -> R6
|
||||
ldrb r6,[r4,#SSEGM_PAR2+1] // load foreground color
|
||||
lsls r7,r6,#8 // [1] shift foreground color << 8
|
||||
orrs r7,r6 // [1] color expanded to 16 bits
|
||||
lsls r6,r7,#16 // [1] shift 16-bit color << 16
|
||||
orrs r6,r7 // [1] color expanded to 32 bits
|
||||
|
||||
// prepare background color, expand to 32 bits -> R4
|
||||
ldrb r4,[r4,#SSEGM_PAR2] // load background color
|
||||
lsls r5,r4,#8 // shift background color << 8
|
||||
orrs r5,r4 // color expanded to 16 bits
|
||||
lsls r4,r5,#16 // shift 16-bit color << 16
|
||||
orrs r4,r5 // color expanded to 32 bits
|
||||
|
||||
// [1] XOR foreground and background color -> R6
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// prepare pointer to conversion table -> LR
|
||||
ldr r5,RenderMText_Addr // get pointer to conversion table -> R5
|
||||
mov lr,r5 // conversion table -> LR
|
||||
|
||||
// ---- render 2nd half of first character
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... start X coordinate
|
||||
// R2 ... pointer to source text buffer
|
||||
// R3 ... pointer to font line
|
||||
// R4 ... background color (expanded to 32-bit)
|
||||
// R5 ... (temporary)
|
||||
// R6 ... foreground color (expanded to 32-bit)
|
||||
// R7 ... (temporary)
|
||||
// LR ... pointer to conversion table
|
||||
// [SP+4] ... base pointer to text data (without X)
|
||||
// [SP+8] ... remaining width
|
||||
// [SP+32] ... wrap width
|
||||
|
||||
// check bit 2 of X coordinate - check if image starts with 2nd half of first character
|
||||
lsls r5,r1,#29 // check bit 2 of X coordinate
|
||||
bpl 2f // bit 2 not set, starting even 4-pixels
|
||||
|
||||
// [5] load font sample -> R5
|
||||
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
|
||||
ldrb r5,[r3,r5] // [2] load font sample -> R5
|
||||
adds r2,#1 // [1] shift pointer to source text buffer
|
||||
|
||||
// [2] prepare conversion table -> R5
|
||||
lsls r5,#3 // [1] multiply font sample * 8
|
||||
add r5,lr // [1] add pointer to conversion table
|
||||
|
||||
// [6] convert second 4 pixels (lower 4 bits)
|
||||
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
stmia r0!,{r7} // [2] store second 4 pixels
|
||||
|
||||
// shift X coordinate
|
||||
adds r1,#4 // shift X coordinate
|
||||
|
||||
// check end of segment
|
||||
ldr r7,[sp,#32] // load wrap width
|
||||
cmp r1,r7 // end of segment?
|
||||
blo 1f
|
||||
movs r1,#0 // reset X coordinate
|
||||
ldr r2,[sp,#4] // get base pointer to text data -> R2
|
||||
|
||||
// shift remaining width
|
||||
1: ldr r7,[sp,#8] // get remaining width
|
||||
subs r7,#4 // shift width
|
||||
str r7,[sp,#8] // save new width
|
||||
|
||||
// prepare wrap width - start X -> R7
|
||||
2: ldr r7,[sp,#32] // load wrap width
|
||||
subs r7,r1 // pixels remaining to end of segment
|
||||
|
||||
// ---- start outer loop, render one part of segment
|
||||
// Outer loop variables (* prepared before outer loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... number of characters to generate in one part of segment
|
||||
// R2 ... *pointer to source text buffer
|
||||
// R3 ... *pointer to font line
|
||||
// R4 ... *background color (expanded to 32-bit)
|
||||
// R5 ... (temporary)
|
||||
// R6 ... *foreground color (expanded to 32-bit)
|
||||
// R7 ... *wrap width of this segment, later: temporary
|
||||
// LR ... *pointer to conversion table
|
||||
// [SP+4] ... *base pointer to text data (without X)
|
||||
// [SP+8] ... *remaining width
|
||||
// [SP+32] ... *wrap width
|
||||
|
||||
RenderMText_OutLoop:
|
||||
|
||||
// limit wrap width by total width -> R7
|
||||
ldr r5,[sp,#8] // get remaining width
|
||||
cmp r7,r5 // compare with wrap width
|
||||
bls 2f // width is OK
|
||||
mov r7,r5 // limit wrap width
|
||||
|
||||
// check if remain whole characters
|
||||
2: cmp r7,#8 // check number of remaining pixels
|
||||
bhs 5f // enough characters remain
|
||||
|
||||
// check if 1st part of last character remains
|
||||
cmp r7,#4 // check 1st part of last character
|
||||
blo 3f // all done
|
||||
|
||||
// ---- render 1st part of last character
|
||||
|
||||
RenderMText_Last:
|
||||
|
||||
// [5] load font sample -> R5
|
||||
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
|
||||
ldrb r5,[r3,r5] // [2] load font sample -> R5
|
||||
adds r2,#1 // [1] shift pointer to source text buffer
|
||||
|
||||
// [2] prepare conversion table -> R5
|
||||
lsls r5,#3 // [1] multiply font sample * 8
|
||||
add r5,lr // [1] add pointer to conversion table
|
||||
|
||||
// [6] convert first 4 pixels (higher 4 bits)
|
||||
ldr r1,[r5,#0] // [2] load mask for higher 4 bits
|
||||
ands r1,r6 // [1] mask foreground color
|
||||
eors r1,r4 // [1] combine with background color
|
||||
stmia r0!,{r1} // [2] store first 4 pixels
|
||||
|
||||
// check if continue with next segment
|
||||
ldr r2,[sp,#4] // get base pointer to text data -> R2
|
||||
cmp r7,#4
|
||||
bhi RenderMText_OutLoop
|
||||
|
||||
// pop registers and return
|
||||
3: pop {r1-r7,pc}
|
||||
|
||||
// ---- prepare to render whole characters
|
||||
|
||||
// prepare number of whole characters to render -> R1
|
||||
5: lsrs r1,r7,#2 // shift to get number of characters*2
|
||||
lsls r7,r1,#2 // shift back to get number of pixels, rounded down -> R7
|
||||
subs r5,r7 // get remaining width
|
||||
str r5,[sp,#8] // save new remaining width
|
||||
subs r1,#1 // number of characters*2 - 1
|
||||
|
||||
// ---- [22*N-1] start inner loop, render characters in one part of segment
|
||||
// Inner loop variables (* prepared before inner loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... *number of characters to generate*2 - 1 (loop counter)
|
||||
// R2 ... *pointer to source text buffer
|
||||
// R3 ... *pointer to font line
|
||||
// R4 ... *background color (expanded to 32-bit)
|
||||
// R5 ... font sample
|
||||
// R6 ... *foreground color (expanded to 32-bit)
|
||||
// R7 ... (temporary)
|
||||
// LR ... *pointer to conversion table
|
||||
|
||||
RenderMText_InLoop:
|
||||
|
||||
// [5] load font sample -> R5
|
||||
ldrb r5,[r2,#0] // [2] load character from source text buffer -> R5
|
||||
ldrb r5,[r3,r5] // [2] load font sample -> R5
|
||||
adds r2,#1 // [1] shift pointer to source text buffer
|
||||
|
||||
// [2] prepare conversion table -> R5
|
||||
lsls r5,#3 // [1] multiply font sample * 8
|
||||
add r5,lr // [1] add pointer to conversion table
|
||||
|
||||
// [6] convert first 4 pixels (higher 4 bits)
|
||||
ldr r7,[r5,#0] // [2] load mask for higher 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
stmia r0!,{r7} // [2] store first 4 pixels
|
||||
|
||||
// [6] convert second 4 pixels (lower 4 bits)
|
||||
ldr r7,[r5,#4] // [2] load mask for lower 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
stmia r0!,{r7} // [2] store second 4 pixels
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r1,#2 // [1] shift loop counter
|
||||
bhi RenderMText_InLoop // [1,2] > 0, render next whole character
|
||||
|
||||
// ---- end inner loop, continue with last character, or start new part
|
||||
|
||||
// continue to outer loop
|
||||
ldr r7,[sp,#32] // load wrap width
|
||||
beq RenderMText_Last // render 1st half of last character
|
||||
ldr r2,[sp,#4] // get base pointer to text data -> R2
|
||||
b RenderMText_OutLoop // go back to outer loop
|
||||
|
||||
.align 2
|
||||
RenderMText_Addr:
|
||||
.word RenderTextMask
|
||||
RenderMText_pSioBase:
|
||||
.word SIO_BASE // addres of SIO base
|
||||
|
|
@ -1,297 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_OSCIL
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// render font pixel mask
|
||||
.extern RenderTextMask // u32 RenderTextMask[512];
|
||||
|
||||
// extern "C" u8* RenderOscil(u8* dbuf, int x, int y, int w, sSegm* segm);
|
||||
|
||||
// render oscilloscope graph GF_OSCIL
|
||||
// dbuf ... destination data buffer
|
||||
// x ... start X coordinate (must be multiple of 4)
|
||||
// y ... start Y coordinate
|
||||
// w ... width of this segment (must be multiple of 4)
|
||||
// segm ... video segment
|
||||
// Output new dbuf pointer.
|
||||
// 320 pixels takes 16.6 us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderOscil
|
||||
RenderOscil:
|
||||
|
||||
// push registers
|
||||
push {r1-r7,lr}
|
||||
|
||||
// Input registers and stack content:
|
||||
// R0 ... pointer to testination data buffer
|
||||
// SP+0: R1 start X coordinate
|
||||
// SP+4: R2 start Y coordinate (later: base pointer to sample data)
|
||||
// SP+8: R3 width to display
|
||||
// SP+12: R4
|
||||
// SP+16: R5
|
||||
// SP+20: R6
|
||||
// SP+24: R7
|
||||
// SP+28: LR
|
||||
// SP+32: video segment (later: wrap width in X direction)
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#32] // load video segment -> R4
|
||||
|
||||
// get wrap width -> [SP+32]
|
||||
ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width
|
||||
movs r7,#3 // mask to align to 32-bit
|
||||
bics r5,r7 // align wrap
|
||||
str r5,[sp,#32] // save wrap width
|
||||
|
||||
// align X coordinate to 32-bit -> R1
|
||||
bics r1,r7
|
||||
|
||||
// align remaining width -> [SP+8]
|
||||
bics r3,r7
|
||||
str r3,[sp,#8] // save new width
|
||||
|
||||
// current Y in direction from bottom to up -> R5
|
||||
ldrh r5,[r4,#SSEGM_WRAPY] // get wrap height
|
||||
subs r5,#1 // wrapy - 1
|
||||
subs r5,r2 // subtract Y, get Y relative to bottom -> R5
|
||||
|
||||
// get pixel height -> LR
|
||||
ldrb r3,[r4,#SSEGM_PAR2] // get pixel height
|
||||
mov lr,r3 // pixel height -> LR
|
||||
|
||||
// base pointer to sample data (without X) -> [SP+4], R2
|
||||
ldr r2,[r4,#SSEGM_DATA] // pointer to sample data
|
||||
str r2,[sp,#4] // save pointer to sample buffer
|
||||
|
||||
// prepare pointer to sample data with X -> R2
|
||||
add r2,r1 // pointer to source sample buffer -> R2
|
||||
|
||||
// prepare foreground color, expand to 32-bit -> R6
|
||||
ldrb r6,[r4,#SSEGM_PAR+1] // load foreground color
|
||||
lsls r3,r6,#8 // [1] shift foreground color << 8
|
||||
orrs r3,r6 // [1] color expanded to 16 bits
|
||||
lsls r6,r3,#16 // [1] shift 16-bit color << 16
|
||||
orrs r6,r3 // [1] color expanded to 32 bits
|
||||
|
||||
// prepare background color, expand to 32 bits -> R4
|
||||
ldrb r4,[r4,#SSEGM_PAR] // load background color
|
||||
lsls r3,r4,#8 // shift background color << 8
|
||||
orrs r3,r4 // color expanded to 16 bits
|
||||
lsls r4,r3,#16 // shift 16-bit color << 16
|
||||
orrs r4,r3 // color expanded to 32 bits
|
||||
|
||||
// [1] XOR foreground and background color -> R6
|
||||
eors r6,r4 // [1] XOR foreground color with background color
|
||||
|
||||
// prepare wrap width - start X -> R7
|
||||
ldr r7,[sp,#32] // load wrap width
|
||||
subs r7,r1 // pixels remaining to end of segment
|
||||
|
||||
// last 4-pixels
|
||||
cmp r7,#4
|
||||
bhi RenderOscil_OutLoop
|
||||
ldr r7,[sp,#32] // load wrap width
|
||||
b RenderOscil_Last // render last 4-pixels of first segment
|
||||
|
||||
// ---- start outer loop, render one part of segment
|
||||
// Outer loop variables (* prepared before outer loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... number of 4-pixels to generate in one part of segment
|
||||
// R2 ... *pointer to source sample buffer
|
||||
// R3 ... remaining width, later: (temporary)
|
||||
// R4 ... *background color (expanded to 32-bit)
|
||||
// R5 ... *current line Y (in direction from bottom to up)
|
||||
// R6 ... *foreground color (expanded to 32-bit)
|
||||
// R7 ... *wrap width of this segment, later: (temporary)
|
||||
// LR ... *pixel height
|
||||
// [SP+4] ... *base pointer to sample data (without X)
|
||||
// [SP+8] ... *remaining width
|
||||
// [SP+32] ... *wrap width
|
||||
|
||||
RenderOscil_OutLoop:
|
||||
|
||||
// limit wrap width by total width -> R7
|
||||
ldr r3,[sp,#8] // get remaining width
|
||||
cmp r7,r3 // compare with wrap width
|
||||
bls 2f // width is OK
|
||||
mov r7,r3 // limit wrap width
|
||||
|
||||
// check number of pixels
|
||||
2: cmp r7,#8 // check number of remaining pixels
|
||||
bhs 5f // enough pixels remain to render 8-pixels
|
||||
|
||||
// check last 4-pixels
|
||||
cmp r7,#4 // check last 4-pixels
|
||||
blo 3f // all done
|
||||
|
||||
// ---- render last 4 pixels
|
||||
|
||||
RenderOscil_Last:
|
||||
|
||||
// [1] clear sample accumulator
|
||||
movs r1,#0 // [1] clear sample accumulator
|
||||
|
||||
// [5] get sample 0
|
||||
ldrb r3,[r2,#0] // [2] get data sample -> R3
|
||||
subs r3,r5 // [1] distance from current line
|
||||
cmp lr,r3 // [1] compare with pixel height
|
||||
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [5] get sample 1
|
||||
ldrb r3,[r2,#1] // [2] get data sample -> R3
|
||||
subs r3,r5 // [1] distance from current line
|
||||
cmp lr,r3 // [1] compare with pixel height
|
||||
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [5] get sample 2
|
||||
ldrb r3,[r2,#2] // [2] get data sample -> R3
|
||||
subs r3,r5 // [1] distance from current line
|
||||
cmp lr,r3 // [1] compare with pixel height
|
||||
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [5] get sample 3
|
||||
ldrb r3,[r2,#3] // [2] get data sample -> R3
|
||||
subs r3,r5 // [1] distance from current line
|
||||
cmp lr,r3 // [1] compare with pixel height
|
||||
adcs r1,r1 // [1] shift carry of comparison to accumulator -> R7
|
||||
adds r2,#4 // [1] shift pointer to source buffer
|
||||
|
||||
// [4] prepare conversion table -> R1
|
||||
lsls r1,#3 // [1] multiply sample * 8
|
||||
ldr r3,RenderOscil_Addr // [2] get pointer to conversion table -> R3
|
||||
add r1,r3 // [1] add pointer to conversion table
|
||||
|
||||
// [7] convert 4 pixels (lower 4 bits)
|
||||
ldr r1,[r1,#4] // [2] load mask for lower 4 bits
|
||||
ands r1,r6 // [1] mask foreground color
|
||||
eors r1,r4 // [1] combine with background color
|
||||
stmia r0!,{r1} // [3] store 4 pixels
|
||||
|
||||
// check if continue with next segment
|
||||
ldr r2,[sp,#4] // get base pointer to sample data -> R2
|
||||
cmp r7,#4
|
||||
bhi RenderOscil_OutLoop
|
||||
|
||||
// pop registers and return
|
||||
3: pop {r1-r7,pc}
|
||||
|
||||
// ---- prepare to render 8-pixels
|
||||
|
||||
// prepare number of whole 4-pixels to render -> R1
|
||||
5: lsrs r1,r7,#2 // shift width to get number of 4-pixels
|
||||
lsls r7,r1,#2 // shift back to get number of pixels, rounded down -> R7
|
||||
subs r3,r7 // get remaining width
|
||||
str r3,[sp,#8] // save new remaining width
|
||||
subs r1,#1 // number of 4-pixels - 1
|
||||
|
||||
// ---- [50*N-1] start inner loop, render in one part of segment
|
||||
// Inner loop variables (* prepared before inner loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... *number of 4-pixels to generate*2 - 1 (loop counter)
|
||||
// R2 ... *pointer to source sample buffer
|
||||
// R3 ... sample
|
||||
// R4 ... *background color (expanded to 32-bit)
|
||||
// R5 ... *current line Y (in direction from bottom to up)
|
||||
// R6 ... *foreground color (expanded to 32-bit)
|
||||
// R7 ... sample accumulator, conversion table
|
||||
// LR ... *pixel height
|
||||
// [SP+4] ... *base pointer to sample data (without X)
|
||||
// [SP+8] ... *remaining width
|
||||
// [SP+32] ... *wrap width
|
||||
|
||||
RenderOscil_InLoop: // render 8 pixels in one loop step, top half of graph
|
||||
|
||||
// [1] clear sample accumulator
|
||||
movs r7,#0 // [1] clear sample accumulator
|
||||
|
||||
// [5] get sample 0
|
||||
ldrb r3,[r2,#0] // [2] get data sample -> R3
|
||||
subs r3,r5 // [1] distance from current line
|
||||
cmp lr,r3 // [1] compare with pixel height
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [5] get sample 1
|
||||
ldrb r3,[r2,#1] // [2] get data sample -> R3
|
||||
subs r3,r5 // [1] distance from current line
|
||||
cmp lr,r3 // [1] compare with pixel height
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [5] get sample 2
|
||||
ldrb r3,[r2,#2] // [2] get data sample -> R3
|
||||
subs r3,r5 // [1] distance from current line
|
||||
cmp lr,r3 // [1] compare with pixel height
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [5] get sample 3
|
||||
ldrb r3,[r2,#3] // [2] get data sample -> R3
|
||||
subs r3,r5 // [1] distance from current line
|
||||
cmp lr,r3 // [1] compare with pixel height
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [5] get sample 4
|
||||
ldrb r3,[r2,#4] // [2] get data sample -> R3
|
||||
subs r3,r5 // [1] distance from current line
|
||||
cmp lr,r3 // [1] compare with pixel height
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [5] get sample 5
|
||||
ldrb r3,[r2,#5] // [2] get data sample -> R3
|
||||
subs r3,r5 // [1] distance from current line
|
||||
cmp lr,r3 // [1] compare with pixel height
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [5] get sample 6
|
||||
ldrb r3,[r2,#6] // [2] get data sample -> R3
|
||||
subs r3,r5 // [1] distance from current line
|
||||
cmp lr,r3 // [1] compare with pixel height
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
|
||||
// [6] get sample 7
|
||||
ldrb r3,[r2,#7] // [2] get data sample -> R3
|
||||
subs r3,r5 // [1] distance from current line
|
||||
cmp lr,r3 // [1] compare with pixel height
|
||||
adcs r7,r7 // [1] shift carry of comparison to accumulator -> R7
|
||||
adds r2,#8 // [1] shift pointer to source buffer
|
||||
|
||||
// [4] prepare conversion table -> R7
|
||||
lsls r7,#3 // [1] multiply sample * 8
|
||||
ldr r3,RenderOscil_Addr // [2] get pointer to conversion table -> R3
|
||||
add r7,r3 // [1] add pointer to conversion table
|
||||
|
||||
// [4] convert first 4 pixels (higher 4 bits)
|
||||
ldr r3,[r7,#0] // [2] load mask for higher 4 bits
|
||||
ands r3,r6 // [1] mask foreground color
|
||||
eors r3,r4 // [1] combine with background color
|
||||
|
||||
// [7] convert second 4 pixels (lower 4 bits)
|
||||
ldr r7,[r7,#4] // [2] load mask for lower 4 bits
|
||||
ands r7,r6 // [1] mask foreground color
|
||||
eors r7,r4 // [1] combine with background color
|
||||
stmia r0!,{r3,r7} // [3] store second 4 pixels
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r1,#2 // [1] shift loop counter
|
||||
bhi RenderOscil_InLoop // [1,2] > 0, render next whole 8-pixels
|
||||
|
||||
// ---- end inner loop, continue with last 4-pixels, or start new part
|
||||
|
||||
// continue to outer loop
|
||||
ldr r7,[sp,#32] // load wrap width
|
||||
8: beq RenderOscil_Last // render last 4-pixels
|
||||
ldr r2,[sp,#4] // get base pointer to sample data -> R2
|
||||
b RenderOscil_OutLoop // go back to outer loop
|
||||
|
||||
.align 2
|
||||
RenderOscil_Addr:
|
||||
.word RenderTextMask
|
||||
|
|
@ -1,190 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_OSCLINE
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// render font pixel mask
|
||||
.extern RenderTextMask // u32 RenderTextMask[512];
|
||||
|
||||
// extern "C" u8* RenderOscLine(u8* dbuf, int x, int y, int w, sSegm* segm);
|
||||
|
||||
// render oscilloscope graph GF_OSCLINE
|
||||
// dbuf ... destination data buffer
|
||||
// x ... start X coordinate (must be multiple of 4)
|
||||
// y ... start Y coordinate
|
||||
// w ... width of this segment (must be multiple of 4)
|
||||
// segm ... video segment
|
||||
// Output new dbuf pointer.
|
||||
// 320 pixels takes 21.5 us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderOscLine
|
||||
RenderOscLine:
|
||||
|
||||
// push registers
|
||||
push {r2-r7,lr}
|
||||
|
||||
// Input registers and stack content:
|
||||
// R0 ... pointer to testination data buffer
|
||||
// R1 ... start X coordinate
|
||||
// SP+0: R2 start Y coordinate (later: base pointer to sample data)
|
||||
// SP+4: R3 width to display
|
||||
// SP+8: R4
|
||||
// SP+12: R5
|
||||
// SP+16: R6
|
||||
// SP+20: R7
|
||||
// SP+24: LR
|
||||
// SP+28: video segment (later: wrap width in X direction)
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#28] // load video segment -> R4
|
||||
|
||||
// get wrap width/2 -> [SP+28]
|
||||
ldrh r5,[r4,#SSEGM_WRAPX] // get wrap width
|
||||
lsrs r5,#1 // wrap width / 2
|
||||
str r5,[sp,#28] // save wrap width
|
||||
|
||||
// X coordinate/2 -> R1
|
||||
lsrs r1,#1
|
||||
|
||||
// remaining width/2 -> [SP+4]
|
||||
lsrs r3,#1
|
||||
str r3,[sp,#4] // save new width
|
||||
|
||||
// current Y in direction from bottom to up -> LR
|
||||
ldrh r5,[r4,#SSEGM_WRAPY] // get wrap height
|
||||
subs r5,#1 // wrapy - 1
|
||||
subs r5,r2 // subtract Y, get Y relative to bottom -> R5
|
||||
mov lr,r5
|
||||
|
||||
// base pointer to sample data (without X) -> [SP+0], R2
|
||||
ldr r2,[r4,#SSEGM_DATA] // pointer to sample data
|
||||
str r2,[sp,#0] // save pointer to sample buffer
|
||||
|
||||
// prepare pointer to sample data with X -> R2
|
||||
add r2,r1 // pointer to source sample buffer -> R2
|
||||
|
||||
// prepare foreground color -> R6
|
||||
ldrb r6,[r4,#SSEGM_PAR+1] // load foreground color
|
||||
lsls r7,r6,#8
|
||||
orrs r6,r7
|
||||
|
||||
// prepare background color -> R4
|
||||
ldrb r4,[r4,#SSEGM_PAR] // load background color
|
||||
lsls r7,r4,#8
|
||||
orrs r4,r7
|
||||
|
||||
// prepare wrap width - start X -> R1
|
||||
ldr r7,[sp,#28] // load wrap width
|
||||
subs r1,r7,r1 // pixels remaining to end of segment
|
||||
|
||||
// ---- start outer loop, render one part of segment
|
||||
// Outer loop variables (* prepared before outer loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... *wrap width of this segment, later: number of pixels to generate in one part of segment
|
||||
// R2 ... *pointer to source sample buffer
|
||||
// R3 ... remaining width, later: (temporary)
|
||||
// R4 ... *background color
|
||||
// R5 ... (temporary)
|
||||
// R6 ... *foreground color
|
||||
// R7 ... (temporary)
|
||||
// LR ... *current line Y (in direction from bottom to up)
|
||||
// [SP+0] ... *base pointer to sample data (without X)
|
||||
// [SP+4] ... *remaining width
|
||||
// [SP+28] ... *wrap width
|
||||
|
||||
RenderOscLine_OutLoop:
|
||||
|
||||
// limit wrap width by total width -> R1
|
||||
ldr r3,[sp,#4] // get remaining width
|
||||
cmp r1,r3 // compare with wrap width
|
||||
bls 2f // width is OK
|
||||
mov r1,r3 // limit wrap width
|
||||
|
||||
// check number of pixels
|
||||
2: cmp r1,#0 // check number of remaining pixels
|
||||
beq RenderOscLine_Stop // stop
|
||||
subs r3,r1 // get remaining width
|
||||
str r3,[sp,#4] // save new remaining width
|
||||
|
||||
// ---- start inner loop, render in one part of segment
|
||||
// Inner loop variables (* prepared before inner loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... *number of pixels to generate (loop counter)
|
||||
// R2 ... *pointer to source sample buffer
|
||||
// R3 ... sample
|
||||
// R4 ... *background color
|
||||
// R5 ... previous sample
|
||||
// R6 ... *foreground color
|
||||
// R7 ... current color
|
||||
// LR ... *current line Y (in direction from bottom to up)
|
||||
// [SP+0] ... *base pointer to sample data (without X)
|
||||
// [SP+4] ... *remaining width
|
||||
// [SP+28] ... *wrap width
|
||||
|
||||
ldrb r5,[r2,#0] // [2] prepare previous sample -> R5
|
||||
|
||||
RenderOscLine_InLoop: // render 8 pixels in one loop step, top half of graph
|
||||
|
||||
// [3] get sample
|
||||
ldrb r3,[r2,#0] // [2] get data sample -> R3
|
||||
adds r2,#1 // [1] increment pointer
|
||||
|
||||
// [1] preset to background color
|
||||
mov r7,r4 // [1] preset to background color
|
||||
|
||||
// [3..8] (sample > previous sample) AND (sample > line) AND (line > previous sample) - display pixel
|
||||
cmp r3,lr // [1] compare sample with line
|
||||
beq 4f // [1,2] (sample == line), true, display pixel everytime
|
||||
blo 2f // [1,2] (sample < line), false
|
||||
cmp r3,r5 // [1] compare sample with previous sample
|
||||
bls 2f // [1,2] (sample <= previous), false
|
||||
cmp lr,r5 // [1] compare line with previous sample
|
||||
bhi 4f // [1,2] (line > previous), true
|
||||
|
||||
// [3..7] (sample < previous sample) AND (sample < line) AND (line < previous sample) - display pixel
|
||||
2: cmp r3,r5 // [1] compare sample with previous sample
|
||||
bhs 6f // [1,2] (sample >= previous), false
|
||||
cmp r3,lr // [1] compare sample with line
|
||||
bhs 6f // [1,2] (sample >= line), false
|
||||
cmp lr,r5 // [1] compare line with previous sample
|
||||
bhs 6f // [1,2] (line >= previous), false
|
||||
|
||||
// [1] use foreground color
|
||||
4: mov r7,r6 // [1] use foreground color
|
||||
|
||||
// [3] write 2 pixels
|
||||
6: strh r7,[r0,#0] // [2] write pixel
|
||||
adds r0,#2 // [1] increment pointer
|
||||
|
||||
// [1] save previous sample
|
||||
mov r5,r3 // [1]
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r1,#1 // [1] shift loop counter
|
||||
bne RenderOscLine_InLoop // [1,2] render next pixel
|
||||
|
||||
// ---- end inner loop, start new part
|
||||
|
||||
// continue to outer loop
|
||||
ldr r1,[sp,#28] // load wrap width
|
||||
ldr r2,[sp,#0] // get base pointer to sample data -> R2
|
||||
b RenderOscLine_OutLoop // go back to outer loop
|
||||
|
||||
RenderOscLine_Stop:
|
||||
|
||||
// pop registers and return
|
||||
pop {r2-r7,pc}
|
||||
|
||||
.align 2
|
||||
RenderOscLine_Addr:
|
||||
.word RenderTextMask
|
||||
|
|
@ -1,360 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render LAYERMODE_PERSP*
|
||||
//
|
||||
// ****************************************************************************
|
||||
// img ... (const u8*) SLAYER_IMG image data
|
||||
// par ... (const void*) SLAYER_PAR pointer to 6 matrix integer parameters m11,m12..m23
|
||||
// horiz ... (s8) SLAYER_HORIZ horizon offset/4 (0=no perspecitve, <0 ceilling)
|
||||
// xbits ... (u8) SLAYER_XBITS number of bits of image width
|
||||
// ybits ... (u8) SLAYER_YBITS number of bits of image height
|
||||
// w ... (u16) SLAYER_W destination width
|
||||
// h ... (u16) SLAYER_H destination height
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
#include "hardware/regs/sio.h" // registers of hardware divider
|
||||
#include "hardware/regs/addressmap.h" // SIO base address
|
||||
|
||||
#define ACCUM0_OFFSET 0
|
||||
#define ACCUM1_OFFSET 4
|
||||
#define BASE0_OFFSET 8
|
||||
#define BASE1_OFFSET 12
|
||||
#define BASE2_OFFSET 16
|
||||
#define POP_LANE0_OFFSET 20
|
||||
#define POP_LANE1_OFFSET 24
|
||||
#define POP_FULL_OFFSET 28
|
||||
#define PEEK_LANE0_OFFSET 32
|
||||
#define PEEK_LANE1_OFFSET 36
|
||||
#define PEEK_FULL_OFFSET 40
|
||||
#define CTRL_LANE0_OFFSET 44
|
||||
#define CTRL_LANE1_OFFSET 48
|
||||
#define ACCUM0_ADD_OFFSET 52
|
||||
#define ACCUM1_ADD_OFFSET 56
|
||||
#define BASE_1AND0_OFFSET 60
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// extern "C" void RenderPersp(u8* dbuf, int y, sLayer* scr)
|
||||
|
||||
// render layers with transformatio matrix LAYERMODE_PERSP*
|
||||
// R0 ... dbuf pointer to data buffer
|
||||
// R1 ... y coordinate of scanline (relative in destination image)
|
||||
// R2 ... scr pointer to layer screen structure sLayer
|
||||
|
||||
.thumb_func
|
||||
.global RenderPersp
|
||||
RenderPersp:
|
||||
|
||||
// push registers
|
||||
push {r4-r7,lr}
|
||||
|
||||
// Stack content and input variables:
|
||||
// R0 dbuf pointer to data buffer
|
||||
// R1 Y coordinate of scanline
|
||||
// R2 scr pointer to layer screen structure sLayer
|
||||
// R3
|
||||
// SP+0: R4
|
||||
// SP+4: R5
|
||||
// SP+8: R6
|
||||
// SP+12: R7
|
||||
// SP+16: LR
|
||||
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... Y coordinate
|
||||
// R2 ... sLayer
|
||||
|
||||
// load horizon offset -> R4, check if use perspective
|
||||
ldr r6,RenderPersp_pSioBase // get address of SIO base -> R6
|
||||
ldrh r5,[r2,#SLAYER_H] // get destination height -> R5
|
||||
ldrb r4,[r2,#SLAYER_HORIZ] // get horizon offset -> R4
|
||||
sxtb r4,r4 // signed extension
|
||||
lsls r4,#2 // horizon * 4, horizon = 0 ?
|
||||
bne 2f // use perspective
|
||||
|
||||
// not using perspective, start Y coordinate y0 = y - h/2 -> R12
|
||||
lsrs r5,#1 // destination height/2 -> R5
|
||||
subs r1,r5 // y - h/2 -> R1
|
||||
mov r12,r1 // current coordinate Y0 = y - h/2 -> R12
|
||||
|
||||
// prepare divide result to get 1<<FRACT
|
||||
movs r5,#1 // R5 <- 1
|
||||
str r5,[r6,#SIO_DIV_UDIVISOR_OFFSET] // divisor = 1
|
||||
lsls r5,#FRACT // constant 1<<FRACT -> R5
|
||||
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL
|
||||
b 4f
|
||||
|
||||
// using perspective, check ceilling mode
|
||||
2: bpl 3f // horizon is not negative
|
||||
subs r1,r5,r1 // negate, y = h - y
|
||||
subs r1,#1 // y = h - 1 - y
|
||||
negs r4,r4 // absolute value of horizon
|
||||
|
||||
// prepare current coordinate Y0 = y - h -> R12
|
||||
3: subs r7,r1,r5 // y - h = current Y coordinate -> R7
|
||||
mov r12,r7 // store current coordinate Y0 -> R12
|
||||
|
||||
// start calculating distance coefficient dist = FRACTMUL*h/(y + horiz)
|
||||
lsls r5,#FRACT // destination height * FRACTMUL -> R5
|
||||
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h
|
||||
adds r1,r4 // horizon + y -> R2
|
||||
str r1,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz
|
||||
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R2 ... sLayer
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// prepare start coordinate X0 = -w/2 -> LR
|
||||
4: ldrh r3,[r2,#SLAYER_W] // get destination width -> R3
|
||||
lsrs r5,r3,#1 // width/2
|
||||
negs r5,r5 // negate
|
||||
mov lr,r5 // store start coordinate X0 -> LR
|
||||
|
||||
// prepare number of 4-pixels (loop counter) -> R7
|
||||
lsrs r7,r3,#2 // width/4 -> R7
|
||||
|
||||
// get number of bits of image width "xbits" -> R1
|
||||
ldrb r1,[r2,#SLAYER_XBITS] // number of bits of image width -> R1
|
||||
|
||||
// get number of bits of image height "ybits" -> R4
|
||||
ldrb r4,[r2,#SLAYER_YBITS] // number of bits of image height -> R4
|
||||
|
||||
// prepare address of interpolator base -> R3
|
||||
ldr r3,RenderPersp_Interp // get address of interpolator base -> R3
|
||||
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... number of bits of image width xbits
|
||||
// R2 ... sLayer
|
||||
// R3 ... interpolator base
|
||||
// R4 ... number of bits of image height ybits
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// ---- setup interpolator
|
||||
|
||||
// set image base to base2
|
||||
ldr r6,[r2,#SLAYER_IMG] // load image base
|
||||
str r6,[r3,#BASE2_OFFSET] // set image base
|
||||
|
||||
// set control word of lane 1 - add raw lane base back to accumulator, shift "FRACT-xbits", mask xbits...xbits+ybits-1
|
||||
ldr r6,RenderPersp_Ctrl // load control word
|
||||
subs r6,r1 // FRACT - xbits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required)
|
||||
lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift xbits to mask LSB position -> R5
|
||||
orrs r6,r5 // add xbits to control word
|
||||
subs r1,#1 // xbits - 1 -> R1
|
||||
adds r5,r1,r4 // xbits-1+ybits -> R5
|
||||
lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to MSB mask position
|
||||
orrs r6,r5 // add to control word
|
||||
str r6,[r3,#CTRL_LANE1_OFFSET] // set control word of lane 1
|
||||
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... image width xbits-1
|
||||
// R2 ... sLayer
|
||||
// R3 ... interpolator base
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// set control word of lane 0 - add raw lane base back to accumulator, shift "FRACT", mask 0..xbits-1
|
||||
ldr r6,RenderPersp_Ctrl // load control word
|
||||
lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift xbits-1 to mask MSB position
|
||||
orrs r6,r1 // add to control word
|
||||
str r6,[r3,#CTRL_LANE0_OFFSET] // set control word of lane 0
|
||||
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R2 ... sLayer
|
||||
// R3 ... interpolator base
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// ---- set matrix
|
||||
|
||||
// get pointer to matrix -> R4
|
||||
ldr r4,[r2,#SLAYER_PAR] // get pointer to matrix -> R4
|
||||
|
||||
// get distance coefficient dist -> R1
|
||||
ldr r1,RenderPersp_pSioBase // get address of SIO base -> R1
|
||||
ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient
|
||||
|
||||
// r4+0 ... m11
|
||||
// r4+4 ... m12
|
||||
// r4+8 ... m13
|
||||
// r4+12 ... m21
|
||||
// r4+16 ... m22
|
||||
// r4+20 ... m23
|
||||
|
||||
// set m11 -> R5 base0
|
||||
ldr r5,[r4,#0] // load m11
|
||||
muls r5,r1 // m11*dist
|
||||
asrs r5,#FRACT // (m11*dist)>>FRACT
|
||||
str r5,[r3,#BASE0_OFFSET] // set base0
|
||||
|
||||
// set m21 -> R6 base1
|
||||
ldr r6,[r4,#12] // load m21
|
||||
muls r6,r1 // m21*dist
|
||||
asrs r6,#FRACT // (m21*dist)>>FRACT
|
||||
str r6,[r3,#BASE1_OFFSET] // set base1
|
||||
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... distance coefficient
|
||||
// R3 ... interpolator base
|
||||
// R4 ... pointer to matrix
|
||||
// R5 ... m11
|
||||
// R6 ... m21
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// set x0*m11 + y0*m12 + m13 -> accum0
|
||||
mov r2,lr // start coordinate X0 -> X2
|
||||
muls r5,r2 // x0*m11 -> R5
|
||||
muls r2,r6 // x0*m21 -> R2
|
||||
mov lr,r1 // save distance coefficient -> LR
|
||||
ldr r6,[r4,#4] // load m12 -> R6
|
||||
muls r1,r6 // m12*dist -> R1
|
||||
asrs r1,#FRACT // (m12*dist)>>FRACT -> R1
|
||||
mov r6,r12 // load coordinate Y0 -> R6
|
||||
muls r1,r6 // y0*m12 -> R1
|
||||
adds r5,r1 // x0*m11 + y0*m12 -> R5
|
||||
ldr r1,[r4,#8] // load m13 -> R1
|
||||
adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5
|
||||
str r5,[r3,#ACCUM0_OFFSET] // set accum0
|
||||
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R2 ... x0*m21
|
||||
// R3 ... interpolator base
|
||||
// R4 ... pointer to matrix
|
||||
// R6 ... current coordinate Y0
|
||||
// R7 ... width/4
|
||||
// LR ... distance coefficient
|
||||
|
||||
// set x0*m21 + y0*m22 + m23 -> accum1
|
||||
ldr r1,[r4,#16] // load m22 -> R1
|
||||
mov r5,lr // distance coefficient -> R5
|
||||
muls r1,r5 // m22*dist
|
||||
asrs r1,#FRACT // (m22*dist)>>FRACT -> R1
|
||||
muls r1,r6 // y0*m22 -> R1
|
||||
adds r2,r1 // x0*m21 + y0*m22 -> R2
|
||||
ldr r1,[r4,#20] // load m23 -> R1
|
||||
adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2
|
||||
str r2,[r3,#ACCUM1_OFFSET] // set accum1
|
||||
|
||||
// ---- process odd 4-pixel
|
||||
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R3 ... interpolator base
|
||||
// R4 ... (temporary - get pointer to pixel)
|
||||
// R5 ... (temporary - load pixel)
|
||||
// R6 ... (temporary - pixel accumulator)
|
||||
// R7 ... width/4 (loop counter)
|
||||
|
||||
lsrs r7,#1 // width/4/2
|
||||
bcc 2f // no odd 4-pixel
|
||||
|
||||
// [3] load 1st pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r6,[r4,#0] // [2] load pixel
|
||||
|
||||
// [5] load 2nd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r5,[r4,#0] // [2] load pixel
|
||||
lsls r5,#8 // [1] shift 1 byte left
|
||||
orrs r6,r5 // [1] add pixel to accumulator
|
||||
|
||||
// [5] load 3rd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r5,[r4,#0] // [2] load pixel
|
||||
lsls r5,#16 // [1] shift 2 bytes left
|
||||
orrs r6,r5 // [1] add pixel to accumulator
|
||||
|
||||
// [5] load 4th pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r5,[r4,#0] // [2] load pixel
|
||||
lsls r5,#24 // [1] shift 3 bytes left
|
||||
orrs r6,r5 // [1] add pixel to accumulator
|
||||
|
||||
// [2] store 4 pixels
|
||||
stmia r0!,{r6} // [2] store 4 pixels
|
||||
|
||||
// check number of remaining pixels
|
||||
2: tst r7,r7 // check number of pixels
|
||||
beq 8f // end
|
||||
|
||||
// ---- [42 per 8 pixels] inner loop
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... (temporary - pixel accumulator 1)
|
||||
// R2 ... (temporary - pixel accumulator 2)
|
||||
// R3 ... interpolator base
|
||||
// R4 ... (temporary - get pointer to pixel, load pixel)
|
||||
// R7 ... width/8 (loop counter)
|
||||
|
||||
// [3] load 1st pixel
|
||||
6: ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r1,[r4,#0] // [2] load pixel
|
||||
|
||||
// [5] load 2nd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r4,[r4,#0] // [2] load pixel
|
||||
lsls r4,#8 // [1] shift 1 byte left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [5] load 3rd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r4,[r4,#0] // [2] load pixel
|
||||
lsls r4,#16 // [1] shift 2 bytes left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [5] load 4th pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r4,[r4,#0] // [2] load pixel
|
||||
lsls r4,#24 // [1] shift 3 bytes left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [3] load 1st pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r2,[r4,#0] // [2] load pixel
|
||||
|
||||
// [5] load 2nd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r4,[r4,#0] // [2] load pixel
|
||||
lsls r4,#8 // [1] shift 1 byte left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [5] load 3rd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r4,[r4,#0] // [2] load pixel
|
||||
lsls r4,#16 // [1] shift 2 bytes left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [5] load 4th pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r4,[r4,#0] // [2] load pixel
|
||||
lsls r4,#24 // [1] shift 3 bytes left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [3] store 8 pixels
|
||||
stmia r0!,{r1,r2} // [3] store 8 pixels
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r7,#1 // [1] 8-pixel counter
|
||||
bne 6b // [1,2] next 8-pixels
|
||||
|
||||
// pop registers
|
||||
8: pop {r4-r7,pc}
|
||||
|
||||
.align 2
|
||||
// pointer to SIO base
|
||||
RenderPersp_pSioBase:
|
||||
.word SIO_BASE // addres of SIO base
|
||||
|
||||
// pointer to Interp1 base
|
||||
RenderPersp_Interp:
|
||||
.word SIO_BASE+SIO_INTERP1_ACCUM0_OFFSET // addres of interpolator base
|
||||
|
||||
RenderPersp_Ctrl: // lane control word
|
||||
.word SIO_INTERP1_CTRL_LANE0_ADD_RAW_BITS | (FRACT<<SIO_INTERP1_CTRL_LANE0_SHIFT_LSB)
|
||||
|
|
@ -1,337 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render LAYERMODE_PERSP2*
|
||||
//
|
||||
// ****************************************************************************
|
||||
// img ... (const u8*) SLAYER_IMG image data
|
||||
// par ... (const void*) SLAYER_PAR pointer to 6 matrix integer parameters m11,m12..m23
|
||||
// horiz ... (s8) SLAYER_HORIZ horizon offset/4 (0=no perspecitve, <0 ceilling)
|
||||
// xbits ... (u8) SLAYER_XBITS number of bits of image width
|
||||
// ybits ... (u8) SLAYER_YBITS number of bits of image height
|
||||
// w ... (u16) SLAYER_W destination width
|
||||
// h ... (u16) SLAYER_H destination height
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
#include "hardware/regs/sio.h" // registers of hardware divider
|
||||
#include "hardware/regs/addressmap.h" // SIO base address
|
||||
|
||||
#define ACCUM0_OFFSET 0
|
||||
#define ACCUM1_OFFSET 4
|
||||
#define BASE0_OFFSET 8
|
||||
#define BASE1_OFFSET 12
|
||||
#define BASE2_OFFSET 16
|
||||
#define POP_LANE0_OFFSET 20
|
||||
#define POP_LANE1_OFFSET 24
|
||||
#define POP_FULL_OFFSET 28
|
||||
#define PEEK_LANE0_OFFSET 32
|
||||
#define PEEK_LANE1_OFFSET 36
|
||||
#define PEEK_FULL_OFFSET 40
|
||||
#define CTRL_LANE0_OFFSET 44
|
||||
#define CTRL_LANE1_OFFSET 48
|
||||
#define ACCUM0_ADD_OFFSET 52
|
||||
#define ACCUM1_ADD_OFFSET 56
|
||||
#define BASE_1AND0_OFFSET 60
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// extern "C" void RenderPersp2(u8* dbuf, int y, sLayer* scr)
|
||||
|
||||
// render layers with transformatio matrix LAYERMODE_PERSP2*, double pixel
|
||||
// R0 ... dbuf pointer to data buffer
|
||||
// R1 ... y coordinate of scanline (relative in destination image)
|
||||
// R2 ... scr pointer to layer screen structure sLayer
|
||||
|
||||
.thumb_func
|
||||
.global RenderPersp2
|
||||
RenderPersp2:
|
||||
|
||||
// push registers
|
||||
push {r4-r7,lr}
|
||||
|
||||
// Stack content and input variables:
|
||||
// R0 dbuf pointer to data buffer
|
||||
// R1 Y coordinate of scanline
|
||||
// R2 scr pointer to layer screen structure sLayer
|
||||
// R3
|
||||
// SP+0: R4
|
||||
// SP+4: R5
|
||||
// SP+8: R6
|
||||
// SP+12: R7
|
||||
// SP+16: LR
|
||||
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... Y coordinate
|
||||
// R2 ... sLayer
|
||||
|
||||
// load horizon offset -> R4, check if use perspective
|
||||
ldr r6,RenderPersp_pSioBase // get address of SIO base -> R6
|
||||
ldrh r5,[r2,#SLAYER_H] // get destination height -> R5
|
||||
ldrb r4,[r2,#SLAYER_HORIZ] // get horizon offset -> R4
|
||||
sxtb r4,r4 // signed extension
|
||||
lsls r4,#2 // horizon * 4, horizon = 0 ?
|
||||
bne 2f // use perspective
|
||||
|
||||
// not using perspective, start Y coordinate y0 = y - h/2 -> R12
|
||||
lsrs r5,#1 // destination height/2 -> R5
|
||||
subs r1,r5 // y - h/2 -> R1
|
||||
mov r12,r1 // current coordinate Y0 = y - h/2 -> R12
|
||||
|
||||
// prepare divide result to get 1<<FRACT
|
||||
movs r5,#1 // R5 <- 1
|
||||
str r5,[r6,#SIO_DIV_UDIVISOR_OFFSET] // divisor = 1
|
||||
lsls r5,#FRACT // constant 1<<FRACT -> R5
|
||||
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL
|
||||
b 4f
|
||||
|
||||
// using perspective, check ceilling mode
|
||||
2: bpl 3f // horizon is not negative
|
||||
subs r1,r5,r1 // negate, y = h - y
|
||||
subs r1,#1 // y = h - 1 - y
|
||||
negs r4,r4 // absolute value of horizon
|
||||
|
||||
// prepare current coordinate Y0 = y - h -> R12
|
||||
3: subs r7,r1,r5 // y - h = current Y coordinate -> R7
|
||||
mov r12,r7 // store current coordinate Y0 -> R12
|
||||
|
||||
// start calculating distance coefficient dist = FRACTMUL*h/(y + horiz)
|
||||
lsls r5,#FRACT // destination height * FRACTMUL -> R5
|
||||
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h
|
||||
adds r1,r4 // horizon + y -> R2
|
||||
str r1,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz
|
||||
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R2 ... sLayer
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// prepare start coordinate X0 = -w/2 -> LR
|
||||
4: ldrh r3,[r2,#SLAYER_W] // get destination width -> R3
|
||||
lsrs r5,r3,#1 // width/2
|
||||
negs r5,r5 // negate
|
||||
mov lr,r5 // store start coordinate X0 -> LR
|
||||
|
||||
// prepare number of 4-pixels (loop counter) -> R7
|
||||
lsrs r7,r3,#2 // width/4 -> R7
|
||||
|
||||
// get number of bits of image width "xbits" -> R1
|
||||
ldrb r1,[r2,#SLAYER_XBITS] // number of bits of image width -> R1
|
||||
|
||||
// get number of bits of image height "ybits" -> R4
|
||||
ldrb r4,[r2,#SLAYER_YBITS] // number of bits of image height -> R4
|
||||
|
||||
// prepare address of interpolator base -> R3
|
||||
ldr r3,RenderPersp_Interp // get address of interpolator base -> R3
|
||||
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... number of bits of image width xbits
|
||||
// R2 ... sLayer
|
||||
// R3 ... interpolator base
|
||||
// R4 ... number of bits of image height ybits
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// ---- setup interpolator
|
||||
|
||||
// set image base to base2
|
||||
ldr r6,[r2,#SLAYER_IMG] // load image base
|
||||
str r6,[r3,#BASE2_OFFSET] // set image base
|
||||
|
||||
// set control word of lane 1 - add raw lane base back to accumulator, shift "FRACT-xbits", mask xbits...xbits+ybits-1
|
||||
ldr r6,RenderPersp_Ctrl // load control word
|
||||
subs r6,r1 // FRACT - xbits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required)
|
||||
lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift xbits to mask LSB position -> R5
|
||||
orrs r6,r5 // add xbits to control word
|
||||
subs r1,#1 // xbits - 1 -> R1
|
||||
adds r5,r1,r4 // xbits-1+ybits -> R5
|
||||
lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to MSB mask position
|
||||
orrs r6,r5 // add to control word
|
||||
str r6,[r3,#CTRL_LANE1_OFFSET] // set control word of lane 1
|
||||
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... image width xbits-1
|
||||
// R2 ... sLayer
|
||||
// R3 ... interpolator base
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// set control word of lane 0 - add raw lane base back to accumulator, shift "FRACT", mask 0..xbits-1
|
||||
ldr r6,RenderPersp_Ctrl // load control word
|
||||
lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift xbits-1 to mask MSB position
|
||||
orrs r6,r1 // add to control word
|
||||
str r6,[r3,#CTRL_LANE0_OFFSET] // set control word of lane 0
|
||||
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R2 ... sLayer
|
||||
// R3 ... interpolator base
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// ---- set matrix
|
||||
|
||||
// get pointer to matrix -> R4
|
||||
ldr r4,[r2,#SLAYER_PAR] // get pointer to matrix -> R4
|
||||
|
||||
// get distance coefficient dist -> R1
|
||||
ldr r1,RenderPersp_pSioBase // get address of SIO base -> R1
|
||||
ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient
|
||||
|
||||
// r4+0 ... m11
|
||||
// r4+4 ... m12
|
||||
// r4+8 ... m13
|
||||
// r4+12 ... m21
|
||||
// r4+16 ... m22
|
||||
// r4+20 ... m23
|
||||
|
||||
// set m11 -> R5 base0
|
||||
ldr r5,[r4,#0] // load m11
|
||||
muls r5,r1 // m11*dist
|
||||
asrs r5,#FRACT-1 // (m11*dist)>>(FRACT-1) ... 2*delta
|
||||
str r5,[r3,#BASE0_OFFSET] // set base0
|
||||
asrs r5,#1 // (m11*dist)>>FRACT
|
||||
|
||||
// set m21 -> R6 base1
|
||||
ldr r6,[r4,#12] // load m21
|
||||
muls r6,r1 // m21*dist
|
||||
asrs r6,#FRACT-1 // (m21*dist)>>(FRACT-1) ... 2*delta
|
||||
str r6,[r3,#BASE1_OFFSET] // set base1
|
||||
asrs r6,#1 // (m21*dist)>>FRACT
|
||||
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... distance coefficient
|
||||
// R3 ... interpolator base
|
||||
// R4 ... pointer to matrix
|
||||
// R5 ... m11
|
||||
// R6 ... m21
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// set x0*m11 + y0*m12 + m13 -> accum0
|
||||
mov r2,lr // start coordinate X0 -> X2
|
||||
muls r5,r2 // x0*m11 -> R5
|
||||
muls r2,r6 // x0*m21 -> R2
|
||||
mov lr,r1 // save distance coefficient -> LR
|
||||
ldr r6,[r4,#4] // load m12 -> R6
|
||||
muls r1,r6 // m12*dist -> R1
|
||||
asrs r1,#FRACT // (m12*dist)>>FRACT -> R1
|
||||
mov r6,r12 // load coordinate Y0 -> R6
|
||||
muls r1,r6 // y0*m12 -> R1
|
||||
adds r5,r1 // x0*m11 + y0*m12 -> R5
|
||||
ldr r1,[r4,#8] // load m13 -> R1
|
||||
adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5
|
||||
str r5,[r3,#ACCUM0_OFFSET] // set accum0
|
||||
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R2 ... x0*m21
|
||||
// R3 ... interpolator base
|
||||
// R4 ... pointer to matrix
|
||||
// R6 ... current coordinate Y0
|
||||
// R7 ... width/4
|
||||
// LR ... distance coefficient
|
||||
|
||||
// set x0*m21 + y0*m22 + m23 -> accum1
|
||||
ldr r1,[r4,#16] // load m22 -> R1
|
||||
mov r5,lr // distance coefficient -> R5
|
||||
muls r1,r5 // m22*dist
|
||||
asrs r1,#FRACT // (m22*dist)>>FRACT -> R1
|
||||
muls r1,r6 // y0*m22 -> R1
|
||||
adds r2,r1 // x0*m21 + y0*m22 -> R2
|
||||
ldr r1,[r4,#20] // load m23 -> R1
|
||||
adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2
|
||||
str r2,[r3,#ACCUM1_OFFSET] // set accum1
|
||||
|
||||
// ---- process odd 4-pixel
|
||||
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... (temporary - pixel accumulator 1)
|
||||
// R3 ... interpolator base
|
||||
// R4 ... (temporary - get pointer to pixel, load pixel)
|
||||
// R7 ... width/4 (loop counter)
|
||||
|
||||
lsrs r7,#1 // width/4/2
|
||||
bcc 2f // no odd 4-pixel
|
||||
|
||||
// [5] load 1st pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r1,[r4,#0] // [2] load pixel
|
||||
lsls r4,r1,#8 // [1] shift 1 byte left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [7] load 2nd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r4,[r4,#0] // [2] load pixel
|
||||
lsls r4,#16 // [1] shift 2 bytes left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
lsls r4,#8 // [1] shift 1 byte left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [2] store 4 pixels
|
||||
stmia r0!,{r1} // [2] store 4 pixels
|
||||
|
||||
// check number of remaining pixels
|
||||
2: tst r7,r7 // check number of pixels
|
||||
beq 8f // end
|
||||
|
||||
// ---- [30 per 8 pixels] inner loop
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... (temporary - pixel accumulator 1)
|
||||
// R2 ... (temporary - pixel accumulator 2)
|
||||
// R3 ... interpolator base
|
||||
// R4 ... (temporary - get pointer to pixel, load pixel)
|
||||
// R7 ... width/8 (loop counter)
|
||||
|
||||
// [5] load 1st pixel
|
||||
6: ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r1,[r4,#0] // [2] load pixel
|
||||
lsls r4,r1,#8 // [1] shift 1 byte left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [7] load 2nd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r4,[r4,#0] // [2] load pixel
|
||||
lsls r4,#16 // [1] shift 2 bytes left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
lsls r4,#8 // [1] shift 1 byte left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [5] load 1st pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r2,[r4,#0] // [2] load pixel
|
||||
lsls r4,r2,#8 // [1] shift 1 byte left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [7] load 2nd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET] // [1] get new value
|
||||
ldrb r4,[r4,#0] // [2] load pixel
|
||||
lsls r4,#16 // [1] shift 2 bytes left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
lsls r4,#8 // [1] shift 1 byte left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [3] store 8 pixels
|
||||
stmia r0!,{r1,r2} // [3] store 8 pixels
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r7,#1 // [1] 8-pixel counter
|
||||
bne 6b // [1,2] next 8-pixels
|
||||
|
||||
// pop registers
|
||||
8: pop {r4-r7,pc}
|
||||
|
||||
.align 2
|
||||
// pointer to SIO base
|
||||
RenderPersp_pSioBase:
|
||||
.word SIO_BASE // addres of SIO base
|
||||
|
||||
// pointer to Interp1 base
|
||||
RenderPersp_Interp:
|
||||
.word SIO_BASE+SIO_INTERP1_ACCUM0_OFFSET // addres of interpolator base
|
||||
|
||||
RenderPersp_Ctrl: // lane control word
|
||||
.word SIO_INTERP1_CTRL_LANE0_ADD_RAW_BITS | (FRACT<<SIO_INTERP1_CTRL_LANE0_SHIFT_LSB)
|
||||
|
|
@ -1,251 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_PLANE2
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// extern "C" u8* RenderPlane2(u8* dbuf, int x, int y, int w, sSegm* segm);
|
||||
|
||||
// render 2-bit palette graphics GF_GRAPH2
|
||||
// R0 ... destination data buffer
|
||||
// R1 ... start X coordinate (must be multiple of 4)
|
||||
// R2 ... start Y coordinate
|
||||
// R3 ... width of this segment (must be multiple of 4)
|
||||
// segm ... video segment
|
||||
// Output new dbuf pointer.
|
||||
// 320 pixels takes 7.3 us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderPlane2
|
||||
RenderPlane2:
|
||||
|
||||
// push registers
|
||||
push {r3-r7,lr}
|
||||
|
||||
// Input registers and stack content:
|
||||
// R0 ... destination data buffer
|
||||
// R1 ... start X coordinate
|
||||
// R2 ... start Y coordinate
|
||||
// SP+0: R3 width to display (remaining width)
|
||||
// SP+4: R4
|
||||
// SP+8: R5
|
||||
// SP+12: R6
|
||||
// SP+16: R7
|
||||
// SP+20: LR
|
||||
// SP+24: video segment (later: wrap width in X direction)
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#24] // load video segment -> R4
|
||||
|
||||
// get wrap width -> [SP+24]
|
||||
ldrh r7,[r4,#SSEGM_WRAPX] // get wrap width
|
||||
movs r6,#3 // mask to align to 32-bit
|
||||
bics r7,r6 // align wrap
|
||||
str r7,[sp,#24] // save wrap width
|
||||
|
||||
// align X coordinate to 32-bit -> R1
|
||||
bics r1,r6
|
||||
|
||||
// align remaining width -> [SP+0]
|
||||
bics r3,r6
|
||||
str r3,[sp,#0] // save new width
|
||||
|
||||
// base pointer to image data (without X) -> LR, R2
|
||||
ldrh r5,[r4,#SSEGM_WB] // get pitch of rows
|
||||
muls r2,r5 // Y * WB -> offset of row in image buffer
|
||||
ldr r5,[r4,#SSEGM_DATA] // pointer to data
|
||||
add r2,r5 // base address of image buffer
|
||||
mov lr,r2 // save pointer to image buffer
|
||||
|
||||
// prepare pointer to image data with X -> R2
|
||||
lsrs r6,r1,#3 // convert X to 8-pixel offset
|
||||
add r2,r6 // pointer to source image buffer -> R2
|
||||
|
||||
// prepare size of one plane -> R3
|
||||
ldr r3,[r4,#SSEGM_PAR] // get size of one plane -> R3
|
||||
|
||||
// prepare pointer to palette translation table -> R7
|
||||
ldr r7,[r4,#SSEGM_PAR2] // get pointer to palette translation table -> R7
|
||||
|
||||
// ---- render 2nd half of first 8-pixel
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... start X coordinate
|
||||
// R2 ... pointer to source image data
|
||||
// R3 ... size of one plane (= offset of plane 1 from plane 0)
|
||||
// R4 ... (temporary)
|
||||
// R5 ... (temporary)
|
||||
// R6 ... (temporary)
|
||||
// R7 ... *pointer to palette translation table
|
||||
// LR ... *base pointer to image data (without X)
|
||||
// [SP+0] ... *remaining width
|
||||
// [SP+24] ... *wrap width
|
||||
|
||||
// check bit 2 of X coordinate - check if image starts with 2nd half of first 8-pixel
|
||||
lsls r5,r1,#29 // check bit 2 of X coordinate
|
||||
bpl 2f // bit 2 not set, starting even 4-pixels
|
||||
|
||||
// [5] load samples -> R5, R6
|
||||
ldrb r5,[r2,#0] // [2] load sample from plane 1
|
||||
ldrb r6,[r2,r3] // [2] load sample from plane 2
|
||||
adds r2,#1 // [1] increase pointer
|
||||
|
||||
// [5] compose samples LOW -> R5
|
||||
lsls r6,#28 // [1] isolate low 4 bits from sample 2
|
||||
lsrs r6,#22 // [1] shift to bit position 6
|
||||
lsls r5,#28 // [1] isolate low 4 bit from sample 1
|
||||
lsrs r5,#26 // [1] shift to bit position 2
|
||||
orrs r5,r6 // [1] compose samples
|
||||
|
||||
// [5] write pixels
|
||||
ldr r5,[r7,r5] // [2] load colors
|
||||
stmia r0!,{r5} // [3] write pixels
|
||||
|
||||
// shift X coordinate
|
||||
adds r1,#4 // shift X coordinate
|
||||
|
||||
// check end of segment
|
||||
ldr r6,[sp,#24] // load wrap width
|
||||
cmp r1,r6 // X=end of segment?
|
||||
blo 1f
|
||||
movs r1,#0 // reset X coordinate
|
||||
mov r2,lr // get base pointer to image data -> R2
|
||||
|
||||
// shift remaining width
|
||||
1: ldr r6,[sp,#0] // get remaining width
|
||||
subs r6,#4 // shift width
|
||||
str r6,[sp,#0] // save new width
|
||||
|
||||
// prepare wrap width - start X -> R6
|
||||
2: ldr r6,[sp,#24] // load wrap width
|
||||
subs r6,r1 // pixels remaining to end of segment
|
||||
|
||||
// ---- start outer loop, render one part of segment
|
||||
// Outer loop variables (* prepared before outer loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... number of 4-pixels - 1 to generate in one part of segment
|
||||
// R2 ... *pointer to source image data
|
||||
// R3 ... *size of one plane (= offset of plane 1 from plane 0)
|
||||
// R4 ... (temporary)
|
||||
// R5 ... (temporary)
|
||||
// R6 ... part width
|
||||
// R7 ... *pointer to palette translation table
|
||||
// LR ... *base pointer to image data (without X)
|
||||
// [SP+0] ... *remaining width
|
||||
// [SP+24] ... *wrap width
|
||||
|
||||
RenderPlane2_OutLoop:
|
||||
|
||||
// limit wrap width by total width -> R7
|
||||
ldr r4,[sp,#0] // get remaining width
|
||||
cmp r6,r4 // compare with wrap width
|
||||
bls 2f // width is OK
|
||||
mov r6,r4 // limit wrap width
|
||||
|
||||
// check number of pixels
|
||||
2: cmp r6,#8 // check number of remaining pixels
|
||||
bhs 5f // enough 8-pixels remain
|
||||
|
||||
// check if 1st part of last 8-pixel remains
|
||||
cmp r6,#4 // check number of pixels
|
||||
blo 3f // all done
|
||||
|
||||
// ---- render 1st part of last 8-pixel
|
||||
|
||||
RenderPlane2_Last:
|
||||
|
||||
// [5] load samples -> R5, R4
|
||||
ldrb r5,[r2,#0] // [2] load sample from plane 1
|
||||
ldrb r4,[r2,r3] // [2] load sample from plane 2
|
||||
adds r2,#1 // [1] increase pointer
|
||||
|
||||
// [5] compose samples HIGH -> R4
|
||||
lsrs r4,#4 // [1] isolate high 4 bits from sample 2
|
||||
lsls r4,#8 // [1] shift left
|
||||
orrs r4,r5 // [1] compose sample 2 with sample 1
|
||||
lsrs r4,#4 // [1] isolate high 4 bits from sample 1
|
||||
lsls r4,#2 // [1] 2 shifts to get index*4
|
||||
|
||||
// [4] write pixels
|
||||
ldr r4,[r7,r4] // [2] load colors
|
||||
stmia r0!,{r4} // [2] write pixels
|
||||
|
||||
// check if continue with next segment
|
||||
mov r2,lr // get base pointer to image data -> R2
|
||||
cmp r6,#4
|
||||
bhi RenderPlane2_OutLoop
|
||||
|
||||
// pop registers and return
|
||||
3: pop {r3-r7,pc}
|
||||
|
||||
// ---- prepare to render whole 8-pixels
|
||||
|
||||
// prepare number of 4-pixels to render -> R1
|
||||
5: lsrs r1,r6,#2 // shift to get number of 4-pixels
|
||||
lsls r6,r1,#2 // shift back to get number of pixels, rounded down -> R6
|
||||
subs r4,r6 // get remaining width
|
||||
str r4,[sp,#0] // save new remaining width
|
||||
subs r1,#1 // number of 4-pixels - 1
|
||||
|
||||
// ---- [25*N-1] start inner loop, render whole 8-pixels in one part of segment
|
||||
// Inner loop variables (* prepared before inner loop):
|
||||
// R0 ... *pointer to destination data buffer
|
||||
// R1 ... *number of 4-pixels - 1 to generate (loop counter)
|
||||
// R2 ... *pointer to source image data
|
||||
// R3 ... *size of one plane (= offset of plane 1 from plane 0)
|
||||
// R4 ... output sample
|
||||
// R5 ... sample from plane 1
|
||||
// R6 ... sample from plane 2
|
||||
// R7 ... *pointer to palette translation table
|
||||
// LR ... *base pointer to image data (without X)
|
||||
// [SP+0] ... *remaining width
|
||||
// [SP+24] ... *wrap width
|
||||
|
||||
RenderPlane2_InLoop:
|
||||
|
||||
// [5] load samples -> R5, R6
|
||||
ldrb r5,[r2,#0] // [2] load sample from plane 1
|
||||
ldrb r6,[r2,r3] // [2] load sample from plane 2
|
||||
adds r2,#1 // [1] increase pointer
|
||||
|
||||
// [5] compose samples HIGH -> R4
|
||||
lsrs r4,r6,#4 // [1] isolate high 4 bits from sample 2
|
||||
lsls r4,#8 // [1] shift left
|
||||
orrs r4,r5 // [1] compose sample 2 with sample 1
|
||||
lsrs r4,#4 // [1] isolate high 4 bits from sample 1
|
||||
lsls r4,#2 // [1] 2 shifts to get index*4
|
||||
|
||||
// [2] prepare first 4 pixels
|
||||
ldr r4,[r7,r4] // [2] load colors
|
||||
|
||||
// [5] compose samples LOW -> R5
|
||||
lsls r6,#28 // [1] isolate low 4 bits from sample 2
|
||||
lsrs r6,#22 // [1] shift to bit position 6
|
||||
lsls r5,#28 // [1] isolate low 4 bit from sample 1
|
||||
lsrs r5,#26 // [1] shift to bit position 2
|
||||
orrs r5,r6 // [1] compose samples
|
||||
|
||||
// [5] write pixels
|
||||
ldr r5,[r7,r5] // [2] load colors
|
||||
stmia r0!,{r4,r5} // [3] write pixels
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r1,#2 // [1] loop counter
|
||||
bhi RenderPlane2_InLoop // [1,2] > 0, next step
|
||||
|
||||
// ---- end inner loop
|
||||
|
||||
RenderPlane2_EndLoop:
|
||||
|
||||
// continue to outer loop
|
||||
ldr r6,[sp,#24] // load wrap width -> R6
|
||||
beq RenderPlane2_Last // render 1st half of last 8-pixels
|
||||
mov r2,lr // get base pointer to image data -> R2
|
||||
b RenderPlane2_OutLoop // go back to outer loop
|
||||
|
|
@ -1,123 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_PROGRESS
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// extern "C" u32* RenderProgress(u32* cbuf, int x, int y, int w, sSegm* segm);
|
||||
|
||||
// render horizontal progress indicator GF_PROGRESS
|
||||
// R0 ... pointer to control buffer
|
||||
// R1 ... start X coordinate (in pixels, must be multiple of 4)
|
||||
// R2 ... start Y coordinate (in graphics lines)
|
||||
// R3 ... width to display (must be multiple of 4 and > 0)
|
||||
// [stack] ... segm video segment sSegm
|
||||
// Output new pointer to control buffer.
|
||||
// 320 pixels takes 0.5 us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderProgress
|
||||
RenderProgress:
|
||||
|
||||
// push registers
|
||||
push {r4-r7,lr}
|
||||
|
||||
// Stack content:
|
||||
// SP+0: R4
|
||||
// SP+4: R5
|
||||
// SP+8: R6
|
||||
// SP+12: R7
|
||||
// SP+16: LR
|
||||
// SP+20: video segment
|
||||
|
||||
// Variables:
|
||||
// R0 ... pointer to control buffer
|
||||
// R1 ... X coordinate/4
|
||||
// R2 ... data sample
|
||||
// R3 ... remaining width
|
||||
// R4 ... gradient buffer 1
|
||||
// R5 ... gradient buffer 2
|
||||
// R6 ... (temporary)
|
||||
// R7 ... current wrap width
|
||||
// LR ... wrap width
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#20] // load video segment -> R4
|
||||
|
||||
// prepare X coordinate/4 -> R1
|
||||
lsrs r1,#2 // X coordinate/4 -> R1
|
||||
|
||||
// load data sample -> R2
|
||||
ldr r5,[r4,#SSEGM_DATA] // pointer to data
|
||||
ldrb r2,[r5,r2] // load data sample -> R2
|
||||
|
||||
// prepare remaining width/4 -> R3
|
||||
lsrs r3,#2 // width/4 -> R3
|
||||
|
||||
// get wrap width/4 -> LR
|
||||
ldrh r7,[r4,#SSEGM_WRAPX] // get wrap width
|
||||
lsrs r7,#2 // wrap width/4 -> R7
|
||||
mov lr,r7
|
||||
|
||||
// prepare gradient buffers -> R4, R5
|
||||
ldr r5,[r4,#SSEGM_PAR2] // gradient buffer 2 -> R5
|
||||
ldr r4,[r4,#SSEGM_PAR] // gradient buffer 1 -> R4
|
||||
|
||||
// check remaining width
|
||||
2: tst r3,r3 // check remaining width
|
||||
beq 9f // end of data
|
||||
|
||||
// prepare wrap width - start X -> R7
|
||||
mov r7,lr // wrap width
|
||||
subs r7,r1 // pixels remaining to end of segment
|
||||
|
||||
// limit wrap width by total width -> R7
|
||||
cmp r7,r3 // compare with wrap width
|
||||
bls 4f // width is OK
|
||||
mov r7,r3 // limit wrap width
|
||||
|
||||
// decrease remaining width
|
||||
4: subs r3,r7 // subtract from remaining width
|
||||
|
||||
// first part visible if x < data
|
||||
cmp r1,r2
|
||||
bhs 6f // x >= data
|
||||
|
||||
// width of this part
|
||||
subs r6,r2,r1 // width <- data - x
|
||||
|
||||
// limit width
|
||||
cmp r6,r7 // check width
|
||||
bls 5f // width is OK
|
||||
mov r6,r7 // limit width
|
||||
5: subs r7,r6 // decrease width
|
||||
|
||||
// save control block with 1st part
|
||||
5: stm r0!,{r6} // write width
|
||||
adds r6,r4,r1 // gradient address at offset x
|
||||
stm r0!,{r6} // write address
|
||||
mov r1,r2 // X <- data
|
||||
|
||||
// check if some width remain
|
||||
6: tst r7,r7 // check with of this part
|
||||
beq 7f // end of segment
|
||||
|
||||
// save control block width 2nd part
|
||||
stm r0!,{r7} // write width
|
||||
adds r6,r5,r1 // gradient address at offset x
|
||||
stm r0!,{r6} // write address
|
||||
|
||||
// reset X
|
||||
7: movs r1,#0 // reset X
|
||||
b 2b // next segment
|
||||
|
||||
// pop registers and return
|
||||
9: pop {r4-r7,pc}
|
||||
|
|
@ -1,164 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render LAYERMODE_SPRITE*
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// extern "C" void RenderSprite(u8* dbuf, int y, sLayer* scr)
|
||||
|
||||
// render layers with sprites LAYERMODE_SPRITE*
|
||||
// R0 ... dbuf pointer to data buffer
|
||||
// R1 ... y coordinate of scanline
|
||||
// R2 ... scr pointer to layer screen structure sLayer
|
||||
|
||||
.thumb_func
|
||||
.global RenderSprite
|
||||
RenderSprite:
|
||||
|
||||
// push registers
|
||||
push {r4-r7,lr}
|
||||
|
||||
// Stack content and input variables:
|
||||
// R0 dbuf pointer to data buffer
|
||||
// R1 Y coordinate of scanline
|
||||
// R2 scr pointer to layer screen structure sLayer, later: num number of sprites
|
||||
// R3
|
||||
// SP+0: R4
|
||||
// SP+4: R5
|
||||
// SP+8: R6
|
||||
// SP+12: R7
|
||||
// SP+16: LR
|
||||
|
||||
// Variables:
|
||||
// R0 ... dbuf pointer to data buffer, later: dbuf[x] destination address
|
||||
// R1 ... Y coordinate of scanline, later: Y2 coordinate relative to sprite base, later: s->img[Y2*WB+X2] address of sprite line
|
||||
// R2 ... num number of sprites (loop counter), later: W2 width of sprite segment
|
||||
// R3 ... s pointer to current sprite, later: col key color
|
||||
// R4 ... (temporary), later: absolute X coordinate of start of line
|
||||
// R5 ... relative X2 coordinate of sprite segment
|
||||
// R6 ... W layer screen width
|
||||
// R7 ... spr pointer to list of sprites
|
||||
// LR
|
||||
|
||||
// load pointer to list of sprites -> R7
|
||||
ldr r7,[r2,#SLAYER_IMG]
|
||||
|
||||
// load screen width -> R6
|
||||
ldrh r6,[r2,#SLAYER_W]
|
||||
|
||||
// load number of sprites -> R2
|
||||
ldrh r2,[r2,#SLAYER_SPRITENUM]
|
||||
|
||||
// count number of sprites, end if num = 0
|
||||
2: subs r2,#1 // decrement number of sprites
|
||||
blo 9f // no other sprites
|
||||
|
||||
// R0 ... dbuf pointer to data buffer
|
||||
// R1 ... Y coordinate of scanline
|
||||
// R2 ... num number of sprites (loop counter)
|
||||
// R3 ...
|
||||
// R4 ...
|
||||
// R5 ...
|
||||
// R6 ... W layer screen width
|
||||
// R7 ... spr pointer to list of sprites
|
||||
|
||||
// push registers
|
||||
push {r0-r2} // push resiters R0..R2
|
||||
|
||||
// get pointer to next sprite -> R3
|
||||
ldmia r7!,{r3} // pointer to sprite -> R3
|
||||
// R3 ... s pointer to current sprite
|
||||
|
||||
// prepare Y2 coordinate relative to sprite base -> R1
|
||||
ldrh r4,[r3,#SSPRITE_Y] // get Y coordinate of the sprite -> R4
|
||||
sxth r4,r4 // signed extend Y2
|
||||
subs r1,r1,r4 // relative coordinate Y2 = Y - s->y
|
||||
// R1 ... Y2 coordinate relative to sprite base
|
||||
|
||||
// check if Y2 coordinate is valid
|
||||
bmi 8f // Y2 < 0, go next sprite
|
||||
ldrh r4,[r3,#SSPRITE_H] // get sprite height
|
||||
cmp r1,r4 // check sprite height
|
||||
bge 8f // Y2 >= s->h, go next sprite
|
||||
|
||||
// prepare relative start X2 coordinate of this line segment -> R5
|
||||
ldr r4,[r3,#SSPRITE_X0] // get table of X0 of lines
|
||||
ldrb r5,[r4,r1] // get X2 coordinate s->x0[y2] -> R5
|
||||
// lsls r5,#2 // convert X2 coordinate to byte offset
|
||||
// R5 ... relative X2 coordinate of sprite segment
|
||||
|
||||
// get width W2 of this line segment -> R2
|
||||
ldr r4,[r3,#SSPRITE_W0] // get table of W0 of lines
|
||||
ldrb r2,[r4,r1] // get W2 width s->w0[y2] -> R2
|
||||
// lsls r2,#2 // convert W2 width to bytes
|
||||
// R2 ... W2 width of sprite segment
|
||||
|
||||
// get address of sprite line s->img[Y2*s->wb] -> R1
|
||||
ldrh r4,[r3,#SSPRITE_WB] // get sprite pitch w->wb
|
||||
muls r1,r1,r4 // sprite offset Y2*s->wb
|
||||
ldr r4,[r3,#SSPRITE_IMG] // get sprite image
|
||||
add r1,r4 // line address -> R1
|
||||
// R1 ... s->img[Y2*WB] address of sprite line
|
||||
|
||||
// get absolute X coordinate of start of line -> R4
|
||||
ldrh r4,[r3,#SSPRITE_X] // get sprite X coordinate -> R4
|
||||
sxth r4,r4 // signed extend X
|
||||
// R4 ... absolute X coordinate of start of line
|
||||
|
||||
// get key color -> R3
|
||||
ldrb r3,[r3,#SSPRITE_KEYCOL] // get key color -> R3
|
||||
// R3 ... col key color
|
||||
|
||||
// check if X coordinate >= 0
|
||||
adds r4,r4,r5 // s->X + X2, X coordinate of start of line -> R4
|
||||
bpl 3f // X >= 0, sprite does not lie below start
|
||||
|
||||
// sprite correction
|
||||
subs r5,r4 // X2 -= X
|
||||
adds r2,r4 // W2 += X
|
||||
movs r4,#0 // X = 0
|
||||
|
||||
// shift source address -> R1
|
||||
3: adds r1,r5 // add X2
|
||||
// R1 ... s->img[Y2*WB+X2] address of sprite line
|
||||
// R5 ...
|
||||
|
||||
// check line length W2
|
||||
subs r5,r6,r4 // W - X -> R5
|
||||
cmp r2,r5 // compare W2 with W - X
|
||||
ble 4f // W2 <= W - X, length is OK
|
||||
mov r2,r5 // limit segment width W2 -> R2
|
||||
|
||||
// check width W2
|
||||
4: tst r2,r2 // check W2
|
||||
ble 8f // no W2 left (W2 <= 0)
|
||||
|
||||
// shift destination address
|
||||
adds r0,r4
|
||||
|
||||
// R0 ... dbuf pointer to data buffer
|
||||
// R1 ... s->img[Y2*WB+X2] address of sprite line
|
||||
// R2 ... W2 width of sprite segment
|
||||
// R3 ... col key color
|
||||
// R4 ...
|
||||
// R5 ...
|
||||
// R6 ... W layer screen width
|
||||
// R7 ... spr pointer to list of sprites
|
||||
|
||||
// blit sprite line
|
||||
bl BlitKey // blit sprite line
|
||||
|
||||
// pop registers and continue loop
|
||||
8: pop {r0-r2} // pop registers R0..R2
|
||||
b 2b // continue loop
|
||||
|
||||
// pop registers and return
|
||||
9: pop {r4-r7,pc}
|
||||
|
|
@ -1,431 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_TILE
|
||||
//
|
||||
// ****************************************************************************
|
||||
// u16 par3; // SSEGM_PAR3 tile width (must be multiple of 4)
|
||||
// u32 par; // SSEGM_PAR tile table with one column of tiles
|
||||
// u32 par2; // SSEGM_PAR2 tile height
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
#include "hardware/regs/sio.h" // registers of hardware divider
|
||||
#include "hardware/regs/addressmap.h" // SIO base address
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// extern "C" u32* RenderTile(u32* cbuf, int x, int y, int w, sSegm* segm);
|
||||
|
||||
// render tiles GF_TILE
|
||||
// cbuf ... destination control buffer
|
||||
// x ... start X coordinate (must be multiple of 4)
|
||||
// y ... start Y coordinate
|
||||
// w ... width of this segment (must be multiple of 4)
|
||||
// segm ... video segment
|
||||
// Output new cbuf pointer.
|
||||
// 320 pixels takes on 151 MHz: tiles 8x8 3.5 us, tile 16x16 2 us, tiles 32x32 1.3 us, tiles 64x64 0.9 us.
|
||||
|
||||
.thumb_func
|
||||
.global RenderTile
|
||||
RenderTile:
|
||||
|
||||
// push registers
|
||||
push {r1-r7,lr}
|
||||
|
||||
// Input registers and stack content:
|
||||
// R0 ... destination control buffer
|
||||
// SP+0: R1 ... X coordinate
|
||||
// SP+4: R2 ... Y coordinate
|
||||
// SP+8: R3 ... width to display
|
||||
// SP+12: R4
|
||||
// SP+16: R5
|
||||
// SP+20: R6
|
||||
// SP+24: R7
|
||||
// SP+28: LR
|
||||
// SP+32: video segment
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#32] // load video segment -> R4
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R2 ... Y coordinate
|
||||
// R3 ... remaining width
|
||||
// R4 ... sSegm*
|
||||
|
||||
// start divide Y/tile_height
|
||||
ldr r5,RenderTile_pSioBase // get address of SIO base -> R5
|
||||
str r2,[r5,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate
|
||||
ldr r2,[r4,#SSEGM_PAR2] // tile height -> R2
|
||||
str r2,[r5,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, tile height
|
||||
|
||||
// - now we must wait at least 8 clock cycles to get result of division
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R2 ... tile height
|
||||
// R3 ... remaining width
|
||||
// R4 ... sSegm*
|
||||
// R5 ... SIO_BASE
|
||||
|
||||
// [6] get wrap width -> [SP+0]
|
||||
ldrh r7,[r4,#SSEGM_WRAPX] // [2] get wrap width
|
||||
movs r6,#3 // [1] mask to align to 32-bit
|
||||
bics r7,r6 // [1] align wrap
|
||||
str r7,[sp,#0] // [2] save wrap width
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R2 ... tile height
|
||||
// R3 ... remaining width
|
||||
// R4 ... sSegm*
|
||||
// R5 ... SIO_BASE
|
||||
// R6 ... align mask #3
|
||||
// [SP+0] ... wrap width
|
||||
|
||||
// [1] align X coordinate to 32-bit -> R1
|
||||
bics r1,r6 // [1] align X
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R2 ... tile height
|
||||
// R3 ... remaining width
|
||||
// R4 ... sSegm*
|
||||
// R5 ... SIO_BASE
|
||||
// R6 ... align mask #3
|
||||
// [SP+0] ... wrap width
|
||||
|
||||
// [3] align remaining width -> [SP+4]
|
||||
bics r3,r6 // [1] align width
|
||||
str r3,[sp,#4] // [2] store aligned width to [SP+4]
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R2 ... tile height
|
||||
// R4 ... sSegm*
|
||||
// R5 ... SIO_BASE
|
||||
// [SP+0] ... wrap width
|
||||
// [SP+4] ... remaining width
|
||||
|
||||
// [4] prepare tile width -> [SP+8], R3
|
||||
ldrh r3,[r4,#SSEGM_PAR3] // [2] get tile width -> R3
|
||||
str r3,[sp,#8] // [2] save tile width -> [SP+8]
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R2 ... tile height
|
||||
// R3 ... tile width
|
||||
// R4 ... sSegm*
|
||||
// R5 ... SIO_BASE
|
||||
// [SP+0] ... wrap width
|
||||
// [SP+4] ... remaining width
|
||||
// [SP+8] ... tile width
|
||||
|
||||
// load result of division Y/tile_height -> R6 Y relative at row, R7 Y row
|
||||
// Note: QUOTIENT must be read last
|
||||
ldr r6,[r5,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R6, Y coordinate relative to current row
|
||||
ldr r7,[r5,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R7, index of row
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R2 ... tile height
|
||||
// R3 ... tile width
|
||||
// R4 ... sSegm*
|
||||
// R5 ... SIO_BASE
|
||||
// R6 ... Y relative at row
|
||||
// R7 ... Y row index
|
||||
// [SP+0] ... wrap width
|
||||
// [SP+4] ... remaining width
|
||||
// [SP+8] ... tile width
|
||||
|
||||
// start divide X/tile_width
|
||||
str r1,[r5,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, X coordinate
|
||||
str r3,[r5,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, tile width
|
||||
|
||||
// - now we must wait at least 8 clock cycles to get result of division
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R2 ... tile height
|
||||
// R3 ... tile width
|
||||
// R4 ... sSegm*
|
||||
// R5 ... SIO_BASE
|
||||
// R6 ... Y relative at row
|
||||
// R7 ... Y row index
|
||||
// [SP+0] ... wrap width
|
||||
// [SP+4] ... remaining width
|
||||
// [SP+8] ... tile width
|
||||
|
||||
// [1] prepare tile size -> R2
|
||||
muls r2,r3 // [1] tile height*width -> size R2
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R2 ... tile size
|
||||
// R3
|
||||
// R4 ... sSegm*
|
||||
// R5 ... SIO_BASE
|
||||
// R6 ... Y relative at row
|
||||
// R7 ... Y row index
|
||||
// [SP+0] ... wrap width
|
||||
// [SP+4] ... remaining width
|
||||
// [SP+8] ... tile width
|
||||
|
||||
// [7] base pointer to source data buffer (without X) -> LR, R7
|
||||
ldrh r3,[r4,#SSEGM_WB] // [2] get pitch of rows -> R3
|
||||
muls r7,r3 // [1] pitch * row (Y * WB) -> offset of row in data buffer
|
||||
ldr r3,[r4,#SSEGM_DATA] // [2] pointer to data -> R3
|
||||
adds r7,r3 // [1] base address of data buffer
|
||||
mov lr,r7 // [1] save base address
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R2 ... tile size
|
||||
// R3
|
||||
// R4 ... sSegm*
|
||||
// R5 ... SIO_BASE
|
||||
// R6 ... Y relative at row
|
||||
// R7 ... base address of data buffer (without X)
|
||||
// LR ... base address of data buffer (without X)
|
||||
// [SP+0] ... wrap width
|
||||
// [SP+4] ... remaining width
|
||||
// [SP+8] ... tile width
|
||||
|
||||
// [6] tile base address -> R4
|
||||
ldr r3,[sp,#8] // [2] tile width
|
||||
muls r6,r3 // [1] tile width * Y relative to row -> tile line offset R6
|
||||
ldr r4,[r4,#SSEGM_PAR] // [2] pointer to tiles
|
||||
adds r4,r6 // [1] tile base address -> R4
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R2 ... tile size
|
||||
// R3 ... tile width
|
||||
// R4 ... tile base address
|
||||
// R5 ... SIO_BASE
|
||||
// R6
|
||||
// R7 ... base address of data buffer (without X)
|
||||
// LR ... base address of data buffer (without X)
|
||||
// [SP+0] ... wrap width
|
||||
// [SP+4] ... remaining width
|
||||
// [SP+8] ... tile width
|
||||
|
||||
// load result of division X/tile_width -> R6 X pixel relative, R5 tile position
|
||||
// Note: QUOTIENT must be read last
|
||||
ldr r6,[r5,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R6, X pixel relative in tile
|
||||
ldr r5,[r5,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R5, tile position
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R2 ... tile size
|
||||
// R3 ... tile width
|
||||
// R4 ... tile base address
|
||||
// R5 ... tile position
|
||||
// R6 ... X pixel relative in tile
|
||||
// R7 ... base address of data buffer (without X)
|
||||
// LR ... base address of data buffer (without X)
|
||||
// [SP+0] ... wrap width
|
||||
// [SP+4] ... remaining width
|
||||
// [SP+8] ... tile width
|
||||
|
||||
// prepare current pointer to source data buffer with X -> R7
|
||||
adds r7,r5 // tile source address -> R7
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R2 ... tile size
|
||||
// R3 ... tile width
|
||||
// R4 ... tile base address
|
||||
// R5
|
||||
// R6 ... X pixel relative in tile
|
||||
// R7 ... pointer to source data buffer (with X)
|
||||
// LR ... base address of data buffer (without X)
|
||||
// [SP+0] ... wrap width
|
||||
// [SP+4] ... remaining width
|
||||
// [SP+8] ... tile width
|
||||
|
||||
// ---- render rest of first tile
|
||||
|
||||
// check if X is tile-aligned
|
||||
tst r6,r6 // check tile align
|
||||
beq 2f // X is tile aligned
|
||||
|
||||
// shift X coordinate
|
||||
subs r5,r3,r6 // pixels remain in current tile -> R5
|
||||
adds r1,r5 // shift X coordinate (align to next tile)
|
||||
|
||||
// shift remaining width
|
||||
ldr r3,[sp,#4] // get remaining width
|
||||
subs r3,r5 // shift width
|
||||
str r3,[sp,#4] // store remaining width
|
||||
|
||||
// write number of 4-pixels
|
||||
lsrs r5,#2 // number of 4-pixels
|
||||
stmia r0!,{r5} // save width
|
||||
|
||||
// load tile index -> R3
|
||||
ldrb r3,[r7,#0] // [2] load tile index
|
||||
adds r7,#1 // [1] increase tile address
|
||||
|
||||
// write tile addres
|
||||
muls r3,r2 // tile index * tile size = tile offset
|
||||
add r3,r4 // [1] add tile base address
|
||||
add r3,r6 // [1] shift to tile start
|
||||
stmia r0!,{r3} // [3] save pointer
|
||||
|
||||
// check end of segment
|
||||
ldr r3,[sp,#0] // get wrap width
|
||||
cmp r1,r3 // check end of segment
|
||||
blo 2f // not end of segment
|
||||
movs r1,#0 // reset X coordinate
|
||||
mov r7,lr // get base pointer to tile data
|
||||
|
||||
// prepare wrap width - start X -> R5
|
||||
2: ldr r3,[sp,#0] // get wrap width
|
||||
subs r5,r3,r1 // pixels remaining to end of segment
|
||||
ldr r3,[sp,#4] // total remaining width -> R3
|
||||
|
||||
// ---- start outer loop, render one part of segment
|
||||
// Outer loop variables (* prepared before outer loop):
|
||||
// R0 ... *pointer to destination control buffer
|
||||
// R1 ...
|
||||
// R2 ... *tile size
|
||||
// R3 ... *total remaining width
|
||||
// R4 ... *tile base address
|
||||
// R5 ... *wrap width of this segment
|
||||
// R6 ...
|
||||
// R7 ... *pointer to source data buffer
|
||||
// LR ... *base address of data buffer (without X)
|
||||
// [SP+0] ... wrap width
|
||||
// [SP+4] ... remaining width
|
||||
// [SP+8] ... tile width
|
||||
|
||||
RenderTile_OutLoop:
|
||||
|
||||
// limit wrap width by total width -> R5
|
||||
cmp r5,r3 // compare wrap width with total width
|
||||
bls 2f // width is OK
|
||||
mov r5,r3 // limit wrap width
|
||||
|
||||
// check if remain whole tile
|
||||
2: ldr r1,[sp,#8] // get tile width -> R1
|
||||
cmp r5,r1 // check number of remaining pixels
|
||||
bhs 5f // remain whole tiles
|
||||
|
||||
// check if start of last tile remains
|
||||
cmp r5,#4 // check start of last tile
|
||||
blo 3f // all done
|
||||
mov r1,r5 // width to render
|
||||
|
||||
// ---- render start of last tile
|
||||
// R0 ... *pointer to destination control buffer
|
||||
// R1 ... *width to render in this segment
|
||||
// R2 ... *tile size
|
||||
// R3 ... *total remaining width
|
||||
// R4 ... *tile base address
|
||||
// R5 ... *wrap width of this segment
|
||||
// R6 ...
|
||||
// R7 ... *pointer to source data buffer (with X)
|
||||
// LR ... *base address of data buffer (without X)
|
||||
// [SP+0] ... wrap width
|
||||
// [SP+4] ... remaining width
|
||||
// [SP+8] ... tile width
|
||||
|
||||
RenderTile_Last:
|
||||
|
||||
// save width
|
||||
lsrs r6,r1,#2 // number of 4-pixels
|
||||
stmia r0!,{r6} // save width
|
||||
|
||||
// load tile index -> R6
|
||||
ldrb r6,[r7,#0] // [2] load tile index
|
||||
adds r7,#1 // [1] increase tile index
|
||||
|
||||
// save tile addres
|
||||
muls r6,r2 // multiply tile index * tile size
|
||||
add r6,r4 // [1] add tile base address
|
||||
stmia r0!,{r6} // [3] save pointer
|
||||
|
||||
// check if continue with next segment
|
||||
mov r7,lr // get base pointer to tile data
|
||||
ldr r6,[sp,#8] // get tile width -> R6
|
||||
cmp r5,r6 // whole tile remains?
|
||||
bhs RenderTile_OutLoop // render next segment
|
||||
|
||||
// pop registers and return
|
||||
3: pop {r1-r7,pc}
|
||||
|
||||
// ---- prepare to render whole tiles
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1
|
||||
// R2 ... tile size
|
||||
// R3 ... total remaining width
|
||||
// R4 ... tile base address
|
||||
// R5 ... width of this segment
|
||||
// R6
|
||||
// R7 ... pointer to source data buffer (with X)
|
||||
// LR ... base address of data buffer (without X)
|
||||
// [SP+0] ... wrap width
|
||||
// [SP+4] ... remaining width
|
||||
// [SP+8] ... tile width
|
||||
|
||||
// prepare number of 4-pixels to render -> R1
|
||||
5: lsrs r1,r5,#2 // shift to get number of tiles in multiply of 4-pixels -> R1
|
||||
lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5
|
||||
subs r3,r5 // update remaining width -> R3
|
||||
|
||||
ldr r5,[sp,#8] // get tile width -> R5
|
||||
lsrs r5,#2 // tile width/4 -> R5
|
||||
subs r1,r5 // number of 4-pixels - width/4
|
||||
adds r1,#1 // number of 4-pixels - (width/4-1)
|
||||
|
||||
// ---- [11*N-1] start inner loop, render in one part of segment
|
||||
// Inner loop variables (* prepared before inner loop):
|
||||
// R0 ... *pointer to destination control buffer
|
||||
// R1 ... *number of 4-pixels to generate - 1 (loop counter)
|
||||
// R2 ... *tile size
|
||||
// R3 ... *total remaining width
|
||||
// R4 ... *tile base address
|
||||
// R5 ... *tile width/4
|
||||
// R6 ... (temporary)
|
||||
// R7 ... *pointer to source data buffer (with X)
|
||||
// LR ... base address of data buffer (without X)
|
||||
// [SP+0] ... wrap width
|
||||
// [SP+4] ... remaining width
|
||||
// [SP+8] ... tile width
|
||||
|
||||
RenderTile_InLoop:
|
||||
|
||||
// [3] load tile index -> R6
|
||||
ldrb r6,[r7,#0] // [2] load tile index
|
||||
adds r7,#1 // [1] increase tile index
|
||||
|
||||
// [2] get tile addres
|
||||
muls r6,r2 // [1] multiply tile index * tile size
|
||||
add r6,r4 // [1] add tile base address
|
||||
|
||||
// [3] save control block
|
||||
stmia r0!,{r5,r6} // [3] save width and pointer
|
||||
|
||||
// [2,3] loop
|
||||
subs r1,r5 // [1] shift loop counter, subtract tile width/4
|
||||
bhi RenderTile_InLoop // [1,2] > 0, render next whole tile
|
||||
|
||||
// ---- end inner loop, continue with last tile, or start new part
|
||||
|
||||
// continue to outer loop
|
||||
adds r1,r5 // return size of last tile
|
||||
subs r1,#1 // add "tile size/4 - 1"
|
||||
ldr r5,[sp,#0] // load wrap width -> R5
|
||||
lsls r1,#2 // convert back to pixels
|
||||
bne RenderTile_Last // render 1st half of last tile
|
||||
mov r7,lr // get base pointer to tile data -> R7
|
||||
b RenderTile_OutLoop // go back to outer loop
|
||||
|
||||
.align 2
|
||||
// pointer to SIO base
|
||||
RenderTile_pSioBase:
|
||||
.word SIO_BASE // addres of SIO base
|
||||
|
|
@ -1,376 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_TILE2
|
||||
//
|
||||
// ****************************************************************************
|
||||
// u16 par3; // SSEGM_PAR3 tile width (must be multiple of 4)
|
||||
// u32 par; // SSEGM_PAR tile table with one column of tiles
|
||||
// u32 par2; // SSEGM_PAR2 LOW tile height, HIGH tile width bytes
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
#include "hardware/regs/sio.h" // registers of hardware divider
|
||||
#include "hardware/regs/addressmap.h" // SIO base address
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// extern "C" u32* RenderTile2(u32* cbuf, int x, int y, int w, sSegm* segm);
|
||||
|
||||
// render tiles GF_TILE2
|
||||
// cbuf ... destination control buffer
|
||||
// x ... start X coordinate (must be multiple of 4)
|
||||
// y ... start Y coordinate
|
||||
// w ... width of this segment (must be multiple of 4)
|
||||
// segm ... video segment
|
||||
// Output new cbuf pointer.
|
||||
// 320 pixels takes on 151 MHz: tiles 8x8 3.5 us, tile 16x16 2 us, tiles 32x32 1.3 us, tiles 64x64 0.9 us.
|
||||
|
||||
.thumb_func
|
||||
.global RenderTile2
|
||||
RenderTile2:
|
||||
|
||||
// push registers
|
||||
push {r2-r7,lr}
|
||||
|
||||
// Input registers and stack content:
|
||||
// R0 ... destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// SP+0: R2 ... Y coordinate
|
||||
// SP+4: R3 ... width to display
|
||||
// SP+8: R4
|
||||
// SP+12: R5
|
||||
// SP+16: R6
|
||||
// SP+20: R7
|
||||
// SP+24: LR
|
||||
// SP+28: video segment
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#28] // load video segment -> R4
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R2 ... Y coordinate
|
||||
// R3 ... remaining width
|
||||
// R4 ... sSegm*
|
||||
|
||||
// start divide Y/tile_height
|
||||
ldr r5,RenderTile_pSioBase // get address of SIO base -> R5
|
||||
str r2,[r5,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, Y coordinate
|
||||
ldrh r2,[r4,#SSEGM_PAR2] // tile height -> R2
|
||||
str r2,[r5,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, tile height
|
||||
|
||||
// - now we must wait at least 8 clock cycles to get result of division
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R3 ... remaining width
|
||||
// R4 ... sSegm*
|
||||
// R5 ... SIO_BASE
|
||||
|
||||
// [6] get wrap width -> [SP+0]
|
||||
ldrh r7,[r4,#SSEGM_WRAPX] // [2] get wrap width
|
||||
movs r6,#3 // [1] mask to align to 32-bit
|
||||
bics r7,r6 // [1] align wrap
|
||||
str r7,[sp,#0] // [2] save wrap width
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R3 ... remaining width
|
||||
// R4 ... sSegm*
|
||||
// R5 ... SIO_BASE
|
||||
// R6 ... align mask #3
|
||||
// [SP+0] ... wrap width
|
||||
|
||||
// [1] align X coordinate to 32-bit -> R1
|
||||
bics r1,r6 // [1] align X
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R3 ... remaining width
|
||||
// R4 ... sSegm*
|
||||
// R5 ... SIO_BASE
|
||||
// R6 ... align mask #3
|
||||
// [SP+0] ... wrap width
|
||||
|
||||
// [3] align remaining width -> [SP+4]
|
||||
bics r3,r6 // [1] align width
|
||||
str r3,[sp,#4] // [2] store aligned width to [SP+4]
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R4 ... sSegm*
|
||||
// R5 ... SIO_BASE
|
||||
// [SP+0] ... wrap width
|
||||
// [SP+4] ... remaining width
|
||||
|
||||
// [2] prepare tile width -> R3
|
||||
ldrh r3,[r4,#SSEGM_PAR3] // [2] get tile width -> R3
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R3 ... tile width
|
||||
// R4 ... sSegm*
|
||||
// R5 ... SIO_BASE
|
||||
// [SP+0] ... wrap width
|
||||
// [SP+4] ... remaining width
|
||||
|
||||
// load result of division Y/tile_height -> R6 Y relative at row, R7 Y row
|
||||
// Note: QUOTIENT must be read last
|
||||
ldr r6,[r5,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R6, Y coordinate relative to current row
|
||||
ldr r7,[r5,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R7, index of row
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R3 ... tile width
|
||||
// R4 ... sSegm*
|
||||
// R5 ... SIO_BASE
|
||||
// R6 ... Y relative at row
|
||||
// R7 ... Y row index
|
||||
// [SP+0] ... wrap width
|
||||
// [SP+4] ... remaining width
|
||||
|
||||
// start divide X/tile_width
|
||||
str r1,[r5,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, X coordinate
|
||||
str r3,[r5,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, tile width
|
||||
|
||||
// - now we must wait at least 8 clock cycles to get result of division
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R3 ... tile width
|
||||
// R4 ... sSegm*
|
||||
// R5 ... SIO_BASE
|
||||
// R6 ... Y relative at row
|
||||
// R7 ... Y row index
|
||||
// [SP+0] ... wrap width
|
||||
// [SP+4] ... remaining width
|
||||
|
||||
// [7] base pointer to source data buffer (without X) -> LR, R7
|
||||
ldrh r2,[r4,#SSEGM_WB] // [2] get pitch of rows -> R2
|
||||
muls r7,r2 // [1] pitch * row (Y * WB) -> offset of row in data buffer
|
||||
ldr r2,[r4,#SSEGM_DATA] // [2] pointer to data -> R2
|
||||
adds r7,r2 // [1] base address of data buffer
|
||||
mov lr,r7 // [1] save base address
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R3 ... tile width
|
||||
// R4 ... sSegm*
|
||||
// R5 ... SIO_BASE
|
||||
// R6 ... Y relative at row
|
||||
// R7 ... base address of data buffer (without X)
|
||||
// LR ... base address of data buffer (without X)
|
||||
// [SP+0] ... wrap width
|
||||
// [SP+4] ... remaining width
|
||||
|
||||
// [6] tile base address -> R4
|
||||
ldrh r2,[r4,#SSEGM_PAR2+2] // [2] tile width bytes -> R2
|
||||
muls r6,r2 // [1] tile width bytes * Y relative to row -> tile line offset R6
|
||||
ldr r4,[r4,#SSEGM_PAR] // [2] pointer to tiles
|
||||
adds r4,r6 // [1] tile base address -> R4
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R3 ... tile width
|
||||
// R4 ... tile base address
|
||||
// R5 ... SIO_BASE
|
||||
// R7 ... base address of data buffer (without X)
|
||||
// LR ... base address of data buffer (without X)
|
||||
// [SP+0] ... wrap width
|
||||
// [SP+4] ... remaining width
|
||||
|
||||
// load result of division X/tile_width -> R6 X pixel relative, R5 tile position
|
||||
// Note: QUOTIENT must be read last
|
||||
ldr r6,[r5,#SIO_DIV_REMAINDER_OFFSET] // get remainder of result -> R6, X pixel relative in tile
|
||||
ldr r5,[r5,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R5, tile position
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R3 ... tile width
|
||||
// R4 ... tile base address
|
||||
// R5 ... tile position
|
||||
// R6 ... X pixel relative in tile
|
||||
// R7 ... base address of data buffer (without X)
|
||||
// LR ... base address of data buffer (without X)
|
||||
// [SP+0] ... wrap width
|
||||
// [SP+4] ... remaining width
|
||||
|
||||
// prepare current pointer to source data buffer with X -> R7
|
||||
adds r7,r5 // tile source address -> R7
|
||||
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... X coordinate
|
||||
// R3 ... tile width
|
||||
// R4 ... tile base address
|
||||
// R6 ... X pixel relative in tile
|
||||
// R7 ... pointer to source data buffer (with X)
|
||||
// LR ... base address of data buffer (without X)
|
||||
// [SP+0] ... wrap width
|
||||
// [SP+4] ... remaining width
|
||||
|
||||
// ---- render rest of first tile
|
||||
|
||||
// check if X is tile-aligned
|
||||
tst r6,r6 // check tile align
|
||||
beq 2f // X is tile aligned
|
||||
|
||||
// shift X coordinate
|
||||
subs r5,r3,r6 // pixels remain in current tile -> R5
|
||||
adds r1,r5 // shift X coordinate (align to next tile)
|
||||
|
||||
// shift remaining width
|
||||
ldr r2,[sp,#4] // get remaining width
|
||||
subs r2,r5 // shift width
|
||||
str r2,[sp,#4] // store remaining width
|
||||
|
||||
// write number of 4-pixels
|
||||
lsrs r5,#2 // number of 4-pixels
|
||||
stmia r0!,{r5} // save width
|
||||
|
||||
// load tile index -> R2
|
||||
ldrb r2,[r7,#0] // [2] load tile index
|
||||
adds r7,#1 // [1] increase tile address
|
||||
|
||||
// write tile addres
|
||||
muls r2,r3 // tile index * tile width = tile offset
|
||||
add r2,r4 // [1] add tile base address
|
||||
add r2,r6 // [1] shift to tile start
|
||||
stmia r0!,{r2} // [3] save pointer
|
||||
|
||||
// check end of segment
|
||||
ldr r2,[sp,#0] // get wrap width
|
||||
cmp r1,r2 // check end of segment
|
||||
blo 2f // not end of segment
|
||||
movs r1,#0 // reset X coordinate
|
||||
mov r7,lr // get base pointer to tile data
|
||||
|
||||
// prepare wrap width - start X -> R5
|
||||
2: ldr r2,[sp,#0] // get wrap width
|
||||
subs r5,r2,r1 // pixels remaining to end of segment
|
||||
ldr r2,[sp,#4] // total remaining width -> R3
|
||||
|
||||
// ---- start outer loop, render one part of segment
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R2 ... total remaining width
|
||||
// R3 ... tile width
|
||||
// R4 ... tile base address
|
||||
// R5 ... wrap width of this segment
|
||||
// R7 ... pointer to source data buffer
|
||||
// LR ... base address of data buffer (without X)
|
||||
// [SP+0] ... wrap width
|
||||
|
||||
RenderTile_OutLoop:
|
||||
|
||||
// limit wrap width by total width -> R5
|
||||
cmp r5,r2 // compare wrap width with total width
|
||||
bls 2f // width is OK
|
||||
mov r5,r2 // limit wrap width
|
||||
|
||||
// check if remain whole tile
|
||||
2: cmp r5,r3 // check number of remaining pixels
|
||||
bhs 5f // remain whole tiles
|
||||
|
||||
// check if start of last tile remains
|
||||
cmp r5,#4 // check start of last tile
|
||||
blo 3f // all done
|
||||
mov r1,r5 // width to render
|
||||
|
||||
// ---- render start of last tile
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... width to render in this segment
|
||||
// R2 ... total remaining width
|
||||
// R3 ... tile width
|
||||
// R4 ... tile base address
|
||||
// R5 ... wrap width of this segment
|
||||
// R7 ... pointer to source data buffer (with X)
|
||||
// LR ... base address of data buffer (without X)
|
||||
// [SP+0] ... wrap width
|
||||
|
||||
RenderTile_Last:
|
||||
|
||||
// save width
|
||||
lsrs r6,r1,#2 // number of 4-pixels
|
||||
stmia r0!,{r6} // save width
|
||||
|
||||
// load tile index -> R6
|
||||
ldrb r6,[r7,#0] // [2] load tile index
|
||||
adds r7,#1 // [1] increase tile index
|
||||
|
||||
// save tile addres
|
||||
muls r6,r3 // multiply tile index * tile width
|
||||
add r6,r4 // [1] add tile base address
|
||||
stmia r0!,{r6} // [3] save pointer
|
||||
|
||||
// check if continue with next segment
|
||||
mov r7,lr // get base pointer to tile data
|
||||
cmp r5,r3 // whole tile remains?
|
||||
bhs RenderTile_OutLoop // render next segment
|
||||
|
||||
// pop registers and return
|
||||
3: pop {r2-r7,pc}
|
||||
|
||||
// ---- prepare to render whole tiles
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R2 ... total remaining width
|
||||
// R3 ... tile width
|
||||
// R4 ... tile base address
|
||||
// R5 ... width of this segment
|
||||
// R7 ... pointer to source data buffer (with X)
|
||||
// LR ... base address of data buffer (without X)
|
||||
// [SP+0] ... wrap width
|
||||
|
||||
// prepare number of 4-pixels to render -> R1
|
||||
5: lsrs r1,r5,#2 // shift to get number of tiles in multiply of 4-pixels -> R1
|
||||
lsls r5,r1,#2 // shift back to get number of pixels, rounded down -> R5
|
||||
subs r2,r5 // update remaining width -> R2
|
||||
|
||||
lsrs r5,r3,#2 // tile width/4 -> R5
|
||||
subs r1,r5 // number of 4-pixels - width/4
|
||||
adds r1,#1 // number of 4-pixels - (width/4-1)
|
||||
|
||||
// ---- [11*N-1] start inner loop, render in one part of segment
|
||||
// R0 ... pointer to destination control buffer
|
||||
// R1 ... number of 4-pixels to generate - 1 (loop counter)
|
||||
// R2 ... total remaining width
|
||||
// R3 ... tile width
|
||||
// R4 ... tile base address
|
||||
// R5 ... tile width/4
|
||||
// R7 ... pointer to source data buffer (with X)
|
||||
// LR ... base address of data buffer (without X)
|
||||
// [SP+0] ... wrap width
|
||||
|
||||
RenderTile_InLoop:
|
||||
|
||||
// [3] load tile index -> R6
|
||||
ldrb r6,[r7,#0] // [2] load tile index
|
||||
adds r7,#1 // [1] increase tile index
|
||||
|
||||
// [2] get tile addres
|
||||
muls r6,r3 // [1] multiply tile index * tile width
|
||||
add r6,r4 // [1] add tile base address
|
||||
|
||||
// [3] save control block
|
||||
stmia r0!,{r5,r6} // [3] save width and pointer
|
||||
|
||||
// [2,3] loop
|
||||
subs r1,r5 // [1] shift loop counter, subtract tile width/4
|
||||
bhi RenderTile_InLoop // [1,2] > 0, render next whole tile
|
||||
|
||||
// ---- end inner loop, continue with last tile, or start new part
|
||||
|
||||
// continue to outer loop
|
||||
adds r1,r5 // return size of last tile
|
||||
subs r1,#1 // add "tile size/4 - 1"
|
||||
ldr r5,[sp,#0] // load wrap width -> R5
|
||||
lsls r1,#2 // convert back to pixels
|
||||
bne RenderTile_Last // render start of last tile
|
||||
mov r7,lr // get base pointer to tile data -> R7
|
||||
b RenderTile_OutLoop // go back to outer loop
|
||||
|
||||
.align 2
|
||||
// pointer to SIO base
|
||||
RenderTile_pSioBase:
|
||||
.word SIO_BASE // addres of SIO base
|
||||
|
|
@ -1,450 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_TILEPERSP
|
||||
//
|
||||
// ****************************************************************************
|
||||
// data ... tile map
|
||||
// par ... column of tile images
|
||||
// par2 ... pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL))
|
||||
// par3 ... LOW8=number of bits of tile width and height, HIGH8=horizon offset
|
||||
// wb ... LOW8=number of bits of tile map width, HIGH8=number of bits of tile map height
|
||||
// wrapy ... segment height
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
#include "hardware/regs/sio.h" // registers of hardware divider
|
||||
#include "hardware/regs/addressmap.h" // SIO base address
|
||||
|
||||
#define ACCUM0_OFFSET0 0
|
||||
#define ACCUM1_OFFSET0 4
|
||||
#define BASE0_OFFSET0 8
|
||||
#define BASE1_OFFSET0 12
|
||||
#define BASE2_OFFSET0 16
|
||||
#define POP_LANE0_OFFSET0 20
|
||||
#define POP_LANE1_OFFSET0 24
|
||||
#define POP_FULL_OFFSET0 28
|
||||
#define PEEK_LANE0_OFFSET0 32
|
||||
#define PEEK_LANE1_OFFSET0 36
|
||||
#define PEEK_FULL_OFFSET0 40
|
||||
#define CTRL_LANE0_OFFSET0 44
|
||||
#define CTRL_LANE1_OFFSET0 48
|
||||
#define ACCUM0_ADD_OFFSET0 52
|
||||
#define ACCUM1_ADD_OFFSET0 56
|
||||
#define BASE_1AND0_OFFSET0 60
|
||||
|
||||
#define ACCUM0_OFFSET1 64
|
||||
#define ACCUM1_OFFSET1 68
|
||||
#define BASE0_OFFSET1 72
|
||||
#define BASE1_OFFSET1 76
|
||||
#define BASE2_OFFSET1 80
|
||||
#define POP_LANE0_OFFSET1 84
|
||||
#define POP_LANE1_OFFSET1 88
|
||||
#define POP_FULL_OFFSET1 92
|
||||
#define PEEK_LANE0_OFFSET1 96
|
||||
#define PEEK_LANE1_OFFSET1 100
|
||||
#define PEEK_FULL_OFFSET1 104
|
||||
#define CTRL_LANE0_OFFSET1 108
|
||||
#define CTRL_LANE1_OFFSET1 112
|
||||
#define ACCUM0_ADD_OFFSET1 116
|
||||
#define ACCUM1_ADD_OFFSET1 120
|
||||
#define BASE_1AND0_OFFSET1 124
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// extern "C" u32* RenderTilePersp(u32* cbuf, int x, int y, int w, sSegm* segm);
|
||||
|
||||
// render tiles with perspective GF_TILEPERSP
|
||||
// using hardware interpolator inter0 and inter1 (their state is not saved during interrup)
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... start X coordinate (not used)
|
||||
// R2 ... start Y coordinate (in graphics lines)
|
||||
// R3 ... width to display (must be multiple of 4)
|
||||
// [stack] ... segm video segment sSegm
|
||||
// Output new pointer to data buffer.
|
||||
// 320 pixels takes ?? us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderTilePersp
|
||||
RenderTilePersp:
|
||||
|
||||
// Input registers and stack:
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... X coordinate (not used)
|
||||
// R2 ... Y coordinate
|
||||
// SP+0: R3 ... remaining width
|
||||
// SP+4: R4
|
||||
// SP+8: R5
|
||||
// SP+12: R6
|
||||
// SP+16: R7
|
||||
// SP+20: LR
|
||||
// SP+24: video segment
|
||||
|
||||
// push registers
|
||||
push {r3-r7,lr}
|
||||
|
||||
// ---- prepare registers
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#24] // load video segment -> R4
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R2 ... Y coordinate
|
||||
// R3 ... remaining width
|
||||
// R4 ... video segment
|
||||
|
||||
// load horizon offset -> R1, check if use perspective
|
||||
ldr r6,RenderTilePersp_pSioBase // get address of SIO base -> R6
|
||||
ldrh r5,[r4,#SSEGM_WRAPY] // get segment height -> R5
|
||||
ldrb r1,[r4,#SSEGM_PAR3+1] // get horizon offset -> R1
|
||||
sxtb r1,r1 // signed extension
|
||||
lsls r1,#2 // horizon * 4, horizon = 0 ?
|
||||
bne 2f // use perspective
|
||||
|
||||
// not using perspective, start Y coordinate y0 = y - h/2 -> R12
|
||||
lsrs r5,#1 // segment height/2 -> R5
|
||||
subs r2,r5 // y - h/2 -> R2
|
||||
mov r12,r2 // current coordinate Y0 = y - h/2 -> R12
|
||||
|
||||
// prepare divide result to get 1<<FRACT
|
||||
movs r5,#1 // R5 <- 1
|
||||
str r5,[r6,#SIO_DIV_UDIVISOR_OFFSET] // divisor = 1
|
||||
lsls r5,#FRACT // constant 1<<FRACT -> R5
|
||||
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL
|
||||
b 4f
|
||||
|
||||
// using perspective, check ceilling mode
|
||||
2: bpl 3f // horizon is not negative
|
||||
subs r2,r5,r2 // negate, y = h - y
|
||||
subs r2,#1 // y = h - 1 - y
|
||||
negs r1,r1 // absolute value of horizon
|
||||
|
||||
// prepare current coordinate Y0 = y - h -> R12
|
||||
3: subs r7,r2,r5 // y - h = current Y coordinate -> R7
|
||||
mov r12,r7 // store current coordinate Y0 -> R12
|
||||
|
||||
// start calculating distance coefficient dist = FRACTMUL*h/(y + horiz)
|
||||
lsls r5,#FRACT // segment height * FRACTMUL -> R5
|
||||
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h
|
||||
adds r2,r1 // horizon + y -> R2
|
||||
str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R3 ... remaining width
|
||||
// R4 ... video segment
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// prepare start coordinate X0 = -w/2 -> LR
|
||||
4: lsrs r5,r3,#1 // width/2
|
||||
negs r5,r5 // negate
|
||||
mov lr,r5 // store start coordinate X0 -> LR
|
||||
|
||||
// prepare number of 4-pixels (loop counter) -> R7
|
||||
lsrs r7,r3,#2 // width/4 -> R7
|
||||
|
||||
// prepare address of interpolator 0 base -> R3
|
||||
ldr r3,RenderTilePersp_Interp // get address of interpolator 0 base -> R3
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R3 ... interpolator base
|
||||
// R4 ... video segment
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// ---- setup interpolator 0 to get tile index
|
||||
|
||||
// set tile map base to base2
|
||||
ldr r6,[r4,#SSEGM_DATA] // load tile map base
|
||||
str r6,[r3,#BASE2_OFFSET0] // set tile map base
|
||||
|
||||
// set control word of lane 0: shift=FRACT+tilebits, mask=0..mapwbits-1
|
||||
ldr r6,RenderTilePersp_Ctrl // load control word
|
||||
ldrb r1,[r4,#SSEGM_PAR3] // get tile width and height -> R1
|
||||
str r1,[sp,#0] // save tile size -> [SP+0]
|
||||
adds r6,r1 // FRACT + tilebits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required)
|
||||
ldrb r2,[r4,#SSEGM_WB] // number of bits of tile map width mapwbits -> R2
|
||||
subs r5,r2,#1 // mapwbits - 1
|
||||
lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position
|
||||
orrs r6,r5 // add to control word
|
||||
str r6,[r3,#CTRL_LANE0_OFFSET0] // set control word of lane 0
|
||||
|
||||
// set control word of lane 1: shift=FRACT+tilebits-mapwbits,
|
||||
// mask=mapwbits..mapwbits+maphbits-1
|
||||
subs r6,r2 // FRACT + tilebits - mapwbits
|
||||
lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift mapwbits to mask LSB position
|
||||
orrs r6,r2 // add mapwbits to control word
|
||||
ldrb r2,[r4,#SSEGM_WB+1] // number of bits of tile map height maphbits -> R2
|
||||
lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift maphbits to mask MSB position
|
||||
adds r6,r2 // add to control word
|
||||
str r6,[r3,#CTRL_LANE1_OFFSET0] // set control word of lane 1
|
||||
|
||||
// ---- setup interpolator 1 to get pixel index
|
||||
|
||||
// set tile image to base2
|
||||
ldr r6,[r4,#SSEGM_PAR] // load tile image base
|
||||
str r6,[r3,#BASE2_OFFSET1] // set tile image base
|
||||
|
||||
// set control word of lane 0: shift=FRACT, mask=0..tilebits-1
|
||||
ldr r6,RenderTilePersp_Ctrl // load control word
|
||||
subs r5,r1,#1 // tilebits - 1
|
||||
lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position
|
||||
orrs r6,r5 // add to control word
|
||||
str r6,[r3,#CTRL_LANE0_OFFSET1] // set control word of lane 0
|
||||
|
||||
// set control word of lane 1: shift=FRACT-tilebits, mask=tilebits..tilebits*2-1
|
||||
subs r6,r1 // FRACT - tilebits
|
||||
lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift to mask LSB position
|
||||
orrs r6,r5 // add tilebits to control word
|
||||
lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift tilebits to mask MSB position
|
||||
adds r6,r1 // add to control word
|
||||
str r6,[r3,#CTRL_LANE1_OFFSET1] // set control word of lane 1
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R3 ... interpolator base
|
||||
// R4 ... video segment
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
// [SP+0] ... number of bits of tile width and height
|
||||
|
||||
// ---- set matrix
|
||||
|
||||
// get pointer to matrix -> R4
|
||||
ldr r4,[r4,#SSEGM_PAR2] // get pointer to matrix -> R4
|
||||
|
||||
// get distance coefficient dist -> R1
|
||||
ldr r1,RenderTilePersp_pSioBase // get address of SIO base -> R1
|
||||
ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient
|
||||
|
||||
// r4+0 ... m11
|
||||
// r4+4 ... m12
|
||||
// r4+8 ... m13
|
||||
// r4+12 ... m21
|
||||
// r4+16 ... m22
|
||||
// r4+20 ... m23
|
||||
|
||||
// set m11 -> R5 base0
|
||||
ldr r5,[r4,#0] // load m11
|
||||
muls r5,r1 // m11*dist
|
||||
asrs r5,#FRACT // (m11*dist)>>FRACT
|
||||
str r5,[r3,#BASE0_OFFSET0] // set base0
|
||||
str r5,[r3,#BASE0_OFFSET1] // set base0
|
||||
|
||||
// set m21 -> R6 base1
|
||||
ldr r6,[r4,#12] // load m21
|
||||
muls r6,r1 // m21*dist
|
||||
asrs r6,#FRACT // (m21*dist)>>FRACT
|
||||
str r6,[r3,#BASE1_OFFSET0] // set base1
|
||||
str r6,[r3,#BASE1_OFFSET1] // set base1
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R1 ... distance coefficient
|
||||
// R3 ... interpolator base
|
||||
// R4 ... pointer to matrix
|
||||
// R5 ... m11
|
||||
// R6 ... m21
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
// [SP+0] ... number of bits of tile width and height
|
||||
|
||||
// set x0*m11 + y0*m12 + m13 -> accum0
|
||||
mov r2,lr // start coordinate X0 -> X2
|
||||
muls r5,r2 // x0*m11 -> R5
|
||||
muls r2,r6 // x0*m21 -> R2
|
||||
mov lr,r1 // save distance coefficient -> LR
|
||||
ldr r6,[r4,#4] // load m12 -> R6
|
||||
muls r1,r6 // m12*dist -> R1
|
||||
asrs r1,#FRACT // (m12*dist)>>FRACT -> R1
|
||||
mov r6,r12 // load coordinate Y0 -> R6
|
||||
muls r1,r6 // y0*m12 -> R1
|
||||
adds r5,r1 // x0*m11 + y0*m12 -> R5
|
||||
ldr r1,[r4,#8] // load m13 -> R1
|
||||
adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5
|
||||
str r5,[r3,#ACCUM0_OFFSET0] // set accum0
|
||||
str r5,[r3,#ACCUM0_OFFSET1] // set accum0
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R2 ... x0*m21
|
||||
// R3 ... interpolator base
|
||||
// R4 ... pointer to matrix
|
||||
// R6 ... current coordinate Y0
|
||||
// R7 ... width/4
|
||||
// LR ... distance coefficient
|
||||
// [SP+0] ... number of bits of tile width and height
|
||||
|
||||
// set x0*m21 + y0*m22 + m23 -> accum1
|
||||
ldr r1,[r4,#16] // load m22 -> R1
|
||||
mov r5,lr // distance coefficient -> R5
|
||||
muls r1,r5 // m22*dist
|
||||
asrs r1,#FRACT // (m22*dist)>>FRACT -> R1
|
||||
muls r1,r6 // y0*m22 -> R1
|
||||
adds r2,r1 // x0*m21 + y0*m22 -> R2
|
||||
ldr r1,[r4,#20] // load m23 -> R1
|
||||
adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2
|
||||
str r2,[r3,#ACCUM1_OFFSET0] // set accum1
|
||||
str r2,[r3,#ACCUM1_OFFSET1] // set accum1
|
||||
|
||||
// ---- process odd 4-pixel
|
||||
|
||||
// prepare tile bits * 2
|
||||
ldr r6,[sp,#0] // get tile bits
|
||||
lsls r6,#1 // tile bits * 2
|
||||
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... (temporary - pixel accumulator 1)
|
||||
// R2 ... (temporary - pixel accumulator 2)
|
||||
// R3 ... interpolator base
|
||||
// R4 ... (temporary - get pointer to tile map, load tile index)
|
||||
// R5 ... (temporary - get pointer to pixel, load pixel)
|
||||
// R6 ... tilebits*2
|
||||
// R7 ... width/4 (loop counter)
|
||||
// [SP+0] ... number of bits of tile width and height
|
||||
|
||||
// check odd 4-pixels
|
||||
lsrs r7,#1 // width/4/2
|
||||
bcc 2f // no odd 4-pixel
|
||||
|
||||
// [7] load 1st pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r1,[r5,r4] // [2] load pixel
|
||||
|
||||
// [9] load 2nd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r4,[r5,r4] // [2] load pixel
|
||||
lsls r4,#8 // [1] shift 1 byte left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [9] load 3rd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r4,[r5,r4] // [2] load pixel
|
||||
lsls r4,#16 // [1] shift 2 bytes left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [9] load 4th pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r4,[r5,r4] // [2] load pixel
|
||||
lsls r4,#24 // [1] shift 3 bytes left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [2] store 4 pixels
|
||||
stmia r0!,{r1} // [2] store 4 pixels
|
||||
|
||||
// check number of remaining pixels
|
||||
2: tst r7,r7 // check number of pixels
|
||||
beq 8f // end
|
||||
|
||||
// ---- [74 per 8 pixels] inner loop
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... (temporary - pixel accumulator 1)
|
||||
// R2 ... (temporary - pixel accumulator 2)
|
||||
// R3 ... interpolator base
|
||||
// R4 ... (temporary - get pointer to tile map, load tile index)
|
||||
// R5 ... (temporary - get pointer to pixel, load pixel)
|
||||
// R6 ... tilebits*2
|
||||
// R7 ... width/8 (loop counter)
|
||||
|
||||
// [7] load 1st pixel
|
||||
6: ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r1,[r5,r4] // [2] load pixel
|
||||
|
||||
// [9] load 2nd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r4,[r5,r4] // [2] load pixel
|
||||
lsls r4,#8 // [1] shift 1 byte left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [9] load 3rd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r4,[r5,r4] // [2] load pixel
|
||||
lsls r4,#16 // [1] shift 2 bytes left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [9] load 4th pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r4,[r5,r4] // [2] load pixel
|
||||
lsls r4,#24 // [1] shift 3 bytes left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [7] load 1st pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r2,[r5,r4] // [2] load pixel
|
||||
|
||||
// [9] load 2nd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r4,[r5,r4] // [2] load pixel
|
||||
lsls r4,#8 // [1] shift 1 byte left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [9] load 3rd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r4,[r5,r4] // [2] load pixel
|
||||
lsls r4,#16 // [1] shift 2 bytes left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [9] load 4th pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r4,[r5,r4] // [2] load pixel
|
||||
lsls r4,#24 // [1] shift 3 bytes left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [3] store 8 pixels
|
||||
stmia r0!,{r1,r2} // [3] store 8 pixels
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r7,#1 // [1] 8-pixel counter
|
||||
bne 6b // [1,2] next 8-pixels
|
||||
|
||||
// pop registers
|
||||
8: pop {r3-r7,pc}
|
||||
|
||||
.align 2
|
||||
// pointer to SIO base
|
||||
RenderTilePersp_pSioBase:
|
||||
.word SIO_BASE // addres of SIO base
|
||||
|
||||
// pointer to Interp0 base
|
||||
RenderTilePersp_Interp:
|
||||
.word SIO_BASE+SIO_INTERP0_ACCUM0_OFFSET // addres of interpolator 0 base
|
||||
|
||||
RenderTilePersp_Ctrl: // lane control word
|
||||
.word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT<<SIO_INTERP0_CTRL_LANE0_SHIFT_LSB)
|
||||
|
|
@ -1,433 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_TILEPERSP15
|
||||
//
|
||||
// ****************************************************************************
|
||||
// data ... tile map
|
||||
// par ... column of tile images
|
||||
// par2 ... pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL))
|
||||
// par3 ... LOW8=number of bits of tile width and height, HIGH8=horizon offset
|
||||
// wb ... LOW8=number of bits of tile map width, HIGH8=number of bits of tile map height
|
||||
// wrapy ... segment height
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
#include "hardware/regs/sio.h" // registers of hardware divider
|
||||
#include "hardware/regs/addressmap.h" // SIO base address
|
||||
|
||||
#define ACCUM0_OFFSET0 0
|
||||
#define ACCUM1_OFFSET0 4
|
||||
#define BASE0_OFFSET0 8
|
||||
#define BASE1_OFFSET0 12
|
||||
#define BASE2_OFFSET0 16
|
||||
#define POP_LANE0_OFFSET0 20
|
||||
#define POP_LANE1_OFFSET0 24
|
||||
#define POP_FULL_OFFSET0 28
|
||||
#define PEEK_LANE0_OFFSET0 32
|
||||
#define PEEK_LANE1_OFFSET0 36
|
||||
#define PEEK_FULL_OFFSET0 40
|
||||
#define CTRL_LANE0_OFFSET0 44
|
||||
#define CTRL_LANE1_OFFSET0 48
|
||||
#define ACCUM0_ADD_OFFSET0 52
|
||||
#define ACCUM1_ADD_OFFSET0 56
|
||||
#define BASE_1AND0_OFFSET0 60
|
||||
|
||||
#define ACCUM0_OFFSET1 64
|
||||
#define ACCUM1_OFFSET1 68
|
||||
#define BASE0_OFFSET1 72
|
||||
#define BASE1_OFFSET1 76
|
||||
#define BASE2_OFFSET1 80
|
||||
#define POP_LANE0_OFFSET1 84
|
||||
#define POP_LANE1_OFFSET1 88
|
||||
#define POP_FULL_OFFSET1 92
|
||||
#define PEEK_LANE0_OFFSET1 96
|
||||
#define PEEK_LANE1_OFFSET1 100
|
||||
#define PEEK_FULL_OFFSET1 104
|
||||
#define CTRL_LANE0_OFFSET1 108
|
||||
#define CTRL_LANE1_OFFSET1 112
|
||||
#define ACCUM0_ADD_OFFSET1 116
|
||||
#define ACCUM1_ADD_OFFSET1 120
|
||||
#define BASE_1AND0_OFFSET1 124
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// extern "C" u32* RenderTilePersp15(u32* cbuf, int x, int y, int w, sSegm* segm);
|
||||
|
||||
// render tiles with perspective GF_TILEPERSP15, 1.5 pixel
|
||||
// using hardware interpolator inter0 and inter1 (their state is not saved during interrup)
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... start X coordinate (not used)
|
||||
// R2 ... start Y coordinate (in graphics lines)
|
||||
// R3 ... width to display (must be multiple of 4)
|
||||
// [stack] ... segm video segment sSegm
|
||||
// Output new pointer to data buffer.
|
||||
// 320 pixels takes ?? us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderTilePersp15
|
||||
RenderTilePersp15:
|
||||
|
||||
// Input registers and stack:
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... X coordinate (not used)
|
||||
// R2 ... Y coordinate
|
||||
// SP+0: R3 ... remaining width
|
||||
// SP+4: R4
|
||||
// SP+8: R5
|
||||
// SP+12: R6
|
||||
// SP+16: R7
|
||||
// SP+20: LR
|
||||
// SP+24: video segment
|
||||
|
||||
// push registers
|
||||
push {r3-r7,lr}
|
||||
|
||||
// ---- prepare registers
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#24] // load video segment -> R4
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R2 ... Y coordinate
|
||||
// R3 ... remaining width
|
||||
// R4 ... video segment
|
||||
|
||||
// load horizon offset -> R1, check if use perspective
|
||||
ldr r6,RenderTilePersp_pSioBase // get address of SIO base -> R6
|
||||
ldrh r5,[r4,#SSEGM_WRAPY] // get segment height -> R5
|
||||
ldrb r1,[r4,#SSEGM_PAR3+1] // get horizon offset -> R1
|
||||
sxtb r1,r1 // signed extension
|
||||
lsls r1,#2 // horizon * 4, horizon = 0 ?
|
||||
bne 2f // use perspective
|
||||
|
||||
// not using perspective, start Y coordinate y0 = y - h/2 -> R12
|
||||
lsrs r5,#1 // segment height/2 -> R5
|
||||
subs r2,r5 // y - h/2 -> R2
|
||||
mov r12,r2 // current coordinate Y0 = y - h/2 -> R12
|
||||
|
||||
// prepare divide result to get 1<<FRACT
|
||||
movs r5,#1 // R5 <- 1
|
||||
str r5,[r6,#SIO_DIV_UDIVISOR_OFFSET] // divisor = 1
|
||||
lsls r5,#FRACT // constant 1<<FRACT -> R5
|
||||
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL
|
||||
b 4f
|
||||
|
||||
// using perspective, check ceilling mode
|
||||
2: bpl 3f // horizon is not negative
|
||||
subs r2,r5,r2 // negate, y = h - y
|
||||
subs r2,#1 // y = h - 1 - y
|
||||
negs r1,r1 // absolute value of horizon
|
||||
|
||||
// prepare current coordinate Y0 = y - h -> R12
|
||||
3: subs r7,r2,r5 // y - h = current Y coordinate -> R7
|
||||
mov r12,r7 // store current coordinate Y0 -> R12
|
||||
|
||||
// start calculating distance coefficient dist = FRACTMUL*h/(y + horiz)
|
||||
lsls r5,#FRACT // segment height * FRACTMUL -> R5
|
||||
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h
|
||||
adds r2,r1 // horizon + y -> R2
|
||||
str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R3 ... remaining width
|
||||
// R4 ... video segment
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// prepare start coordinate X0 = -w/2 -> LR
|
||||
4: lsrs r5,r3,#1 // width/2
|
||||
negs r5,r5 // negate
|
||||
mov lr,r5 // store start coordinate X0 -> LR
|
||||
|
||||
// prepare number of 4-pixels (loop counter) -> R7
|
||||
lsrs r7,r3,#2 // width/4 -> R7
|
||||
|
||||
// prepare address of interpolator 0 base -> R3
|
||||
ldr r3,RenderTilePersp_Interp // get address of interpolator 0 base -> R3
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R3 ... interpolator base
|
||||
// R4 ... video segment
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// ---- setup interpolator 0 to get tile index
|
||||
|
||||
// set tile map base to base2
|
||||
ldr r6,[r4,#SSEGM_DATA] // load tile map base
|
||||
str r6,[r3,#BASE2_OFFSET0] // set tile map base
|
||||
|
||||
// set control word of lane 0: shift=FRACT+tilebits, mask=0..mapwbits-1
|
||||
ldr r6,RenderTilePersp_Ctrl // load control word
|
||||
ldrb r1,[r4,#SSEGM_PAR3] // get tile width and height -> R1
|
||||
str r1,[sp,#0] // save tile size -> [SP+0]
|
||||
adds r6,r1 // FRACT + tilebits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required)
|
||||
ldrb r2,[r4,#SSEGM_WB] // number of bits of tile map width mapwbits -> R2
|
||||
subs r5,r2,#1 // mapwbits - 1
|
||||
lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position
|
||||
orrs r6,r5 // add to control word
|
||||
str r6,[r3,#CTRL_LANE0_OFFSET0] // set control word of lane 0
|
||||
|
||||
// set control word of lane 1: shift=FRACT+tilebits-mapwbits,
|
||||
// mask=mapwbits..mapwbits+maphbits-1
|
||||
subs r6,r2 // FRACT + tilebits - mapwbits
|
||||
lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift mapwbits to mask LSB position
|
||||
orrs r6,r2 // add mapwbits to control word
|
||||
ldrb r2,[r4,#SSEGM_WB+1] // number of bits of tile map height maphbits -> R2
|
||||
lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift maphbits to mask MSB position
|
||||
adds r6,r2 // add to control word
|
||||
str r6,[r3,#CTRL_LANE1_OFFSET0] // set control word of lane 1
|
||||
|
||||
// ---- setup interpolator 1 to get pixel index
|
||||
|
||||
// set tile image to base2
|
||||
ldr r6,[r4,#SSEGM_PAR] // load tile image base
|
||||
str r6,[r3,#BASE2_OFFSET1] // set tile image base
|
||||
|
||||
// set control word of lane 0: shift=FRACT, mask=0..tilebits-1
|
||||
ldr r6,RenderTilePersp_Ctrl // load control word
|
||||
subs r5,r1,#1 // tilebits - 1
|
||||
lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position
|
||||
orrs r6,r5 // add to control word
|
||||
str r6,[r3,#CTRL_LANE0_OFFSET1] // set control word of lane 0
|
||||
|
||||
// set control word of lane 1: shift=FRACT-tilebits, mask=tilebits..tilebits*2-1
|
||||
subs r6,r1 // FRACT - tilebits
|
||||
lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift to mask LSB position
|
||||
orrs r6,r5 // add tilebits to control word
|
||||
lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift tilebits to mask MSB position
|
||||
adds r6,r1 // add to control word
|
||||
str r6,[r3,#CTRL_LANE1_OFFSET1] // set control word of lane 1
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R3 ... interpolator base
|
||||
// R4 ... video segment
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
// [SP+0] ... number of bits of tile width and height
|
||||
|
||||
// ---- set matrix
|
||||
|
||||
// get pointer to matrix -> R4
|
||||
ldr r4,[r4,#SSEGM_PAR2] // get pointer to matrix -> R4
|
||||
|
||||
// get distance coefficient dist -> R1
|
||||
ldr r1,RenderTilePersp_pSioBase // get address of SIO base -> R1
|
||||
ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient
|
||||
|
||||
// r4+0 ... m11
|
||||
// r4+4 ... m12
|
||||
// r4+8 ... m13
|
||||
// r4+12 ... m21
|
||||
// r4+16 ... m22
|
||||
// r4+20 ... m23
|
||||
|
||||
// set m11 -> R5 base0
|
||||
ldr r5,[r4,#0] // load m11
|
||||
muls r5,r1 // m11*dist
|
||||
asrs r5,#FRACT // (m11*dist)>>FRACT
|
||||
asrs r2,r5,#1 // delta/2
|
||||
adds r2,r5 // delta*1.5
|
||||
str r2,[r3,#BASE0_OFFSET0] // set base0
|
||||
str r2,[r3,#BASE0_OFFSET1] // set base0
|
||||
|
||||
// set m21 -> R6 base1
|
||||
ldr r6,[r4,#12] // load m21
|
||||
muls r6,r1 // m21*dist
|
||||
asrs r6,#FRACT // (m21*dist)>>FRACT
|
||||
asrs r2,r6,#1 // delta/2
|
||||
adds r2,r6 // delta*1.5
|
||||
str r2,[r3,#BASE1_OFFSET0] // set base1
|
||||
str r2,[r3,#BASE1_OFFSET1] // set base1
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R1 ... distance coefficient
|
||||
// R3 ... interpolator base
|
||||
// R4 ... pointer to matrix
|
||||
// R5 ... m11
|
||||
// R6 ... m21
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
// [SP+0] ... number of bits of tile width and height
|
||||
|
||||
// set x0*m11 + y0*m12 + m13 -> accum0
|
||||
mov r2,lr // start coordinate X0 -> X2
|
||||
muls r5,r2 // x0*m11 -> R5
|
||||
muls r2,r6 // x0*m21 -> R2
|
||||
mov lr,r1 // save distance coefficient -> LR
|
||||
ldr r6,[r4,#4] // load m12 -> R6
|
||||
muls r1,r6 // m12*dist -> R1
|
||||
asrs r1,#FRACT // (m12*dist)>>FRACT -> R1
|
||||
mov r6,r12 // load coordinate Y0 -> R6
|
||||
muls r1,r6 // y0*m12 -> R1
|
||||
adds r5,r1 // x0*m11 + y0*m12 -> R5
|
||||
ldr r1,[r4,#8] // load m13 -> R1
|
||||
adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5
|
||||
str r5,[r3,#ACCUM0_OFFSET0] // set accum0
|
||||
str r5,[r3,#ACCUM0_OFFSET1] // set accum0
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R2 ... x0*m21
|
||||
// R3 ... interpolator base
|
||||
// R4 ... pointer to matrix
|
||||
// R6 ... current coordinate Y0
|
||||
// R7 ... width/4
|
||||
// LR ... distance coefficient
|
||||
// [SP+0] ... number of bits of tile width and height
|
||||
|
||||
// set x0*m21 + y0*m22 + m23 -> accum1
|
||||
ldr r1,[r4,#16] // load m22 -> R1
|
||||
mov r5,lr // distance coefficient -> R5
|
||||
muls r1,r5 // m22*dist
|
||||
asrs r1,#FRACT // (m22*dist)>>FRACT -> R1
|
||||
muls r1,r6 // y0*m22 -> R1
|
||||
adds r2,r1 // x0*m21 + y0*m22 -> R2
|
||||
ldr r1,[r4,#20] // load m23 -> R1
|
||||
adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2
|
||||
str r2,[r3,#ACCUM1_OFFSET0] // set accum1
|
||||
str r2,[r3,#ACCUM1_OFFSET1] // set accum1
|
||||
|
||||
// ---- process odd 4-pixel
|
||||
|
||||
// prepare tile bits * 2
|
||||
ldr r6,[sp,#0] // get tile bits
|
||||
lsls r6,#1 // tile bits * 2
|
||||
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... (temporary - pixel accumulator 1)
|
||||
// R2 ... (temporary - pixel accumulator 2)
|
||||
// R3 ... interpolator base
|
||||
// R4 ... (temporary - get pointer to tile map, load tile index)
|
||||
// R5 ... (temporary - get pointer to pixel, load pixel)
|
||||
// R6 ... tilebits*2
|
||||
// R7 ... width/4 (loop counter)
|
||||
// [SP+0] ... number of bits of tile width and height
|
||||
|
||||
// check odd 4-pixels
|
||||
lsrs r7,#1 // width/4/2
|
||||
bcc 2f // no odd 4-pixel
|
||||
|
||||
// [7] load 1st pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r1,[r5,r4] // [2] load pixel
|
||||
|
||||
// [9] load 2nd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r4,[r5,r4] // [2] load pixel
|
||||
lsls r4,#8 // [1] shift 1 byte left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [11] load 3rd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r4,[r5,r4] // [2] load pixel
|
||||
lsls r4,#16 // [1] shift 2 bytes left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
lsls r4,#8 // [1] shift 1 byte left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [2] store 4 pixels
|
||||
stmia r0!,{r1} // [2] store 4 pixels
|
||||
|
||||
// check number of remaining pixels
|
||||
2: tst r7,r7 // check number of pixels
|
||||
beq 8f // end
|
||||
|
||||
// ---- [60 per 8 pixels] inner loop
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... (temporary - pixel accumulator 1)
|
||||
// R2 ... (temporary - pixel accumulator 2)
|
||||
// R3 ... interpolator base
|
||||
// R4 ... (temporary - get pointer to tile map, load tile index)
|
||||
// R5 ... (temporary - get pointer to pixel, load pixel)
|
||||
// R6 ... tilebits*2
|
||||
// R7 ... width/8 (loop counter)
|
||||
|
||||
// [7] load 1st pixel
|
||||
6: ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r1,[r5,r4] // [2] load pixel
|
||||
|
||||
// [9] load 2nd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r4,[r5,r4] // [2] load pixel
|
||||
lsls r4,#8 // [1] shift 1 byte left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [11] load 3rd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r4,[r5,r4] // [2] load pixel
|
||||
lsls r4,#16 // [1] shift 2 bytes left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
lsls r4,#8 // [1] shift 1 byte left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [7] load 1st pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r2,[r5,r4] // [2] load pixel
|
||||
|
||||
// [9] load 2nd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r4,[r5,r4] // [2] load pixel
|
||||
lsls r4,#8 // [1] shift 1 byte left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [11] load 3rd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r4,[r5,r4] // [2] load pixel
|
||||
lsls r4,#16 // [1] shift 2 bytes left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
lsls r4,#8 // [1] shift 1 byte left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [3] store 8 pixels
|
||||
stmia r0!,{r1,r2} // [3] store 8 pixels
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r7,#1 // [1] 8-pixel counter
|
||||
bne 6b // [1,2] next 8-pixels
|
||||
|
||||
// pop registers
|
||||
8: pop {r3-r7,pc}
|
||||
|
||||
.align 2
|
||||
// pointer to SIO base
|
||||
RenderTilePersp_pSioBase:
|
||||
.word SIO_BASE // addres of SIO base
|
||||
|
||||
// pointer to Interp0 base
|
||||
RenderTilePersp_Interp:
|
||||
.word SIO_BASE+SIO_INTERP0_ACCUM0_OFFSET // addres of interpolator 0 base
|
||||
|
||||
RenderTilePersp_Ctrl: // lane control word
|
||||
.word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT<<SIO_INTERP0_CTRL_LANE0_SHIFT_LSB)
|
||||
|
|
@ -1,410 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_TILEPERSP2
|
||||
//
|
||||
// ****************************************************************************
|
||||
// data ... tile map
|
||||
// par ... column of tile images
|
||||
// par2 ... pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL))
|
||||
// par3 ... LOW8=number of bits of tile width and height, HIGH8=horizon offset
|
||||
// wb ... LOW8=number of bits of tile map width, HIGH8=number of bits of tile map height
|
||||
// wrapy ... segment height
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
#include "hardware/regs/sio.h" // registers of hardware divider
|
||||
#include "hardware/regs/addressmap.h" // SIO base address
|
||||
|
||||
#define ACCUM0_OFFSET0 0
|
||||
#define ACCUM1_OFFSET0 4
|
||||
#define BASE0_OFFSET0 8
|
||||
#define BASE1_OFFSET0 12
|
||||
#define BASE2_OFFSET0 16
|
||||
#define POP_LANE0_OFFSET0 20
|
||||
#define POP_LANE1_OFFSET0 24
|
||||
#define POP_FULL_OFFSET0 28
|
||||
#define PEEK_LANE0_OFFSET0 32
|
||||
#define PEEK_LANE1_OFFSET0 36
|
||||
#define PEEK_FULL_OFFSET0 40
|
||||
#define CTRL_LANE0_OFFSET0 44
|
||||
#define CTRL_LANE1_OFFSET0 48
|
||||
#define ACCUM0_ADD_OFFSET0 52
|
||||
#define ACCUM1_ADD_OFFSET0 56
|
||||
#define BASE_1AND0_OFFSET0 60
|
||||
|
||||
#define ACCUM0_OFFSET1 64
|
||||
#define ACCUM1_OFFSET1 68
|
||||
#define BASE0_OFFSET1 72
|
||||
#define BASE1_OFFSET1 76
|
||||
#define BASE2_OFFSET1 80
|
||||
#define POP_LANE0_OFFSET1 84
|
||||
#define POP_LANE1_OFFSET1 88
|
||||
#define POP_FULL_OFFSET1 92
|
||||
#define PEEK_LANE0_OFFSET1 96
|
||||
#define PEEK_LANE1_OFFSET1 100
|
||||
#define PEEK_FULL_OFFSET1 104
|
||||
#define CTRL_LANE0_OFFSET1 108
|
||||
#define CTRL_LANE1_OFFSET1 112
|
||||
#define ACCUM0_ADD_OFFSET1 116
|
||||
#define ACCUM1_ADD_OFFSET1 120
|
||||
#define BASE_1AND0_OFFSET1 124
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// extern "C" u32* RenderTilePersp2(u32* cbuf, int x, int y, int w, sSegm* segm);
|
||||
|
||||
// render tiles with perspective GF_TILEPERSP2, double pixels
|
||||
// using hardware interpolator inter0 and inter1 (their state is not saved during interrup)
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... start X coordinate (not used)
|
||||
// R2 ... start Y coordinate (in graphics lines)
|
||||
// R3 ... width to display (must be multiple of 4)
|
||||
// [stack] ... segm video segment sSegm
|
||||
// Output new pointer to data buffer.
|
||||
// 320 pixels takes ?? us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderTilePersp2
|
||||
RenderTilePersp2:
|
||||
|
||||
// Input registers and stack:
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... X coordinate (not used)
|
||||
// R2 ... Y coordinate
|
||||
// SP+0: R3 ... remaining width
|
||||
// SP+4: R4
|
||||
// SP+8: R5
|
||||
// SP+12: R6
|
||||
// SP+16: R7
|
||||
// SP+20: LR
|
||||
// SP+24: video segment
|
||||
|
||||
// push registers
|
||||
push {r3-r7,lr}
|
||||
|
||||
// ---- prepare registers
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#24] // load video segment -> R4
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R2 ... Y coordinate
|
||||
// R3 ... remaining width
|
||||
// R4 ... video segment
|
||||
|
||||
// load horizon offset -> R1, check if use perspective
|
||||
ldr r6,RenderTilePersp_pSioBase // get address of SIO base -> R6
|
||||
ldrh r5,[r4,#SSEGM_WRAPY] // get segment height -> R5
|
||||
ldrb r1,[r4,#SSEGM_PAR3+1] // get horizon offset -> R1
|
||||
sxtb r1,r1 // signed extension
|
||||
lsls r1,#2 // horizon * 4, horizon = 0 ?
|
||||
bne 2f // use perspective
|
||||
|
||||
// not using perspective, start Y coordinate y0 = y - h/2 -> R12
|
||||
lsrs r5,#1 // segment height/2 -> R5
|
||||
subs r2,r5 // y - h/2 -> R2
|
||||
mov r12,r2 // current coordinate Y0 = y - h/2 -> R12
|
||||
|
||||
// prepare divide result to get 1<<FRACT
|
||||
movs r5,#1 // R5 <- 1
|
||||
str r5,[r6,#SIO_DIV_UDIVISOR_OFFSET] // divisor = 1
|
||||
lsls r5,#FRACT // constant 1<<FRACT -> R5
|
||||
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL
|
||||
b 4f
|
||||
|
||||
// using perspective, check ceilling mode
|
||||
2: bpl 3f // horizon is not negative
|
||||
subs r2,r5,r2 // negate, y = h - y
|
||||
subs r2,#1 // y = h - 1 - y
|
||||
negs r1,r1 // absolute value of horizon
|
||||
|
||||
// prepare current coordinate Y0 = y - h -> R12
|
||||
3: subs r7,r2,r5 // y - h = current Y coordinate -> R7
|
||||
mov r12,r7 // store current coordinate Y0 -> R12
|
||||
|
||||
// start calculating distance coefficient dist = FRACTMUL*h/(y + horiz)
|
||||
lsls r5,#FRACT // segment height * FRACTMUL -> R5
|
||||
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h
|
||||
adds r2,r1 // horizon + y -> R2
|
||||
str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R3 ... remaining width
|
||||
// R4 ... video segment
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// prepare start coordinate X0 = -w/2 -> LR
|
||||
4: lsrs r5,r3,#1 // width/2
|
||||
negs r5,r5 // negate
|
||||
mov lr,r5 // store start coordinate X0 -> LR
|
||||
|
||||
// prepare number of 4-pixels (loop counter) -> R7
|
||||
lsrs r7,r3,#2 // width/4 -> R7
|
||||
|
||||
// prepare address of interpolator 0 base -> R3
|
||||
ldr r3,RenderTilePersp_Interp // get address of interpolator 0 base -> R3
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R3 ... interpolator base
|
||||
// R4 ... video segment
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// ---- setup interpolator 0 to get tile index
|
||||
|
||||
// set tile map base to base2
|
||||
ldr r6,[r4,#SSEGM_DATA] // load tile map base
|
||||
str r6,[r3,#BASE2_OFFSET0] // set tile map base
|
||||
|
||||
// set control word of lane 0: shift=FRACT+tilebits, mask=0..mapwbits-1
|
||||
ldr r6,RenderTilePersp_Ctrl // load control word
|
||||
ldrb r1,[r4,#SSEGM_PAR3] // get tile width and height -> R1
|
||||
str r1,[sp,#0] // save tile size -> [SP+0]
|
||||
adds r6,r1 // FRACT + tilebits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required)
|
||||
ldrb r2,[r4,#SSEGM_WB] // number of bits of tile map width mapwbits -> R2
|
||||
subs r5,r2,#1 // mapwbits - 1
|
||||
lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position
|
||||
orrs r6,r5 // add to control word
|
||||
str r6,[r3,#CTRL_LANE0_OFFSET0] // set control word of lane 0
|
||||
|
||||
// set control word of lane 1: shift=FRACT+tilebits-mapwbits,
|
||||
// mask=mapwbits..mapwbits+maphbits-1
|
||||
subs r6,r2 // FRACT + tilebits - mapwbits
|
||||
lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift mapwbits to mask LSB position
|
||||
orrs r6,r2 // add mapwbits to control word
|
||||
ldrb r2,[r4,#SSEGM_WB+1] // number of bits of tile map height maphbits -> R2
|
||||
lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift maphbits to mask MSB position
|
||||
adds r6,r2 // add to control word
|
||||
str r6,[r3,#CTRL_LANE1_OFFSET0] // set control word of lane 1
|
||||
|
||||
// ---- setup interpolator 1 to get pixel index
|
||||
|
||||
// set tile image to base2
|
||||
ldr r6,[r4,#SSEGM_PAR] // load tile image base
|
||||
str r6,[r3,#BASE2_OFFSET1] // set tile image base
|
||||
|
||||
// set control word of lane 0: shift=FRACT, mask=0..tilebits-1
|
||||
ldr r6,RenderTilePersp_Ctrl // load control word
|
||||
subs r5,r1,#1 // tilebits - 1
|
||||
lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position
|
||||
orrs r6,r5 // add to control word
|
||||
str r6,[r3,#CTRL_LANE0_OFFSET1] // set control word of lane 0
|
||||
|
||||
// set control word of lane 1: shift=FRACT-tilebits, mask=tilebits..tilebits*2-1
|
||||
subs r6,r1 // FRACT - tilebits
|
||||
lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift to mask LSB position
|
||||
orrs r6,r5 // add tilebits to control word
|
||||
lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift tilebits to mask MSB position
|
||||
adds r6,r1 // add to control word
|
||||
str r6,[r3,#CTRL_LANE1_OFFSET1] // set control word of lane 1
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R3 ... interpolator base
|
||||
// R4 ... video segment
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
// [SP+0] ... number of bits of tile width and height
|
||||
|
||||
// ---- set matrix
|
||||
|
||||
// get pointer to matrix -> R4
|
||||
ldr r4,[r4,#SSEGM_PAR2] // get pointer to matrix -> R4
|
||||
|
||||
// get distance coefficient dist -> R1
|
||||
ldr r1,RenderTilePersp_pSioBase // get address of SIO base -> R1
|
||||
ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient
|
||||
|
||||
// r4+0 ... m11
|
||||
// r4+4 ... m12
|
||||
// r4+8 ... m13
|
||||
// r4+12 ... m21
|
||||
// r4+16 ... m22
|
||||
// r4+20 ... m23
|
||||
|
||||
// set m11 -> R5 base0
|
||||
ldr r5,[r4,#0] // load m11
|
||||
muls r5,r1 // m11*dist
|
||||
asrs r5,#FRACT-1 // (m11*dist)>>(FRACT-1) ... 2*delta
|
||||
str r5,[r3,#BASE0_OFFSET0] // set base0
|
||||
str r5,[r3,#BASE0_OFFSET1] // set base0
|
||||
asrs r5,#1 // (m11*dist)>>FRACT
|
||||
|
||||
// set m21 -> R6 base1
|
||||
ldr r6,[r4,#12] // load m21
|
||||
muls r6,r1 // m21*dist
|
||||
asrs r6,#FRACT-1 // (m21*dist)>>(FRACT-1) ... 2*delta
|
||||
str r6,[r3,#BASE1_OFFSET0] // set base1
|
||||
str r6,[r3,#BASE1_OFFSET1] // set base1
|
||||
asrs r6,#1 // (m21*dist)>>FRACT
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R1 ... distance coefficient
|
||||
// R3 ... interpolator base
|
||||
// R4 ... pointer to matrix
|
||||
// R5 ... m11
|
||||
// R6 ... m21
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
// [SP+0] ... number of bits of tile width and height
|
||||
|
||||
// set x0*m11 + y0*m12 + m13 -> accum0
|
||||
mov r2,lr // start coordinate X0 -> X2
|
||||
muls r5,r2 // x0*m11 -> R5
|
||||
muls r2,r6 // x0*m21 -> R2
|
||||
mov lr,r1 // save distance coefficient -> LR
|
||||
ldr r6,[r4,#4] // load m12 -> R6
|
||||
muls r1,r6 // m12*dist -> R1
|
||||
asrs r1,#FRACT // (m12*dist)>>FRACT -> R1
|
||||
mov r6,r12 // load coordinate Y0 -> R6
|
||||
muls r1,r6 // y0*m12 -> R1
|
||||
adds r5,r1 // x0*m11 + y0*m12 -> R5
|
||||
ldr r1,[r4,#8] // load m13 -> R1
|
||||
adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5
|
||||
str r5,[r3,#ACCUM0_OFFSET0] // set accum0
|
||||
str r5,[r3,#ACCUM0_OFFSET1] // set accum0
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R2 ... x0*m21
|
||||
// R3 ... interpolator base
|
||||
// R4 ... pointer to matrix
|
||||
// R6 ... current coordinate Y0
|
||||
// R7 ... width/4
|
||||
// LR ... distance coefficient
|
||||
// [SP+0] ... number of bits of tile width and height
|
||||
|
||||
// set x0*m21 + y0*m22 + m23 -> accum1
|
||||
ldr r1,[r4,#16] // load m22 -> R1
|
||||
mov r5,lr // distance coefficient -> R5
|
||||
muls r1,r5 // m22*dist
|
||||
asrs r1,#FRACT // (m22*dist)>>FRACT -> R1
|
||||
muls r1,r6 // y0*m22 -> R1
|
||||
adds r2,r1 // x0*m21 + y0*m22 -> R2
|
||||
ldr r1,[r4,#20] // load m23 -> R1
|
||||
adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2
|
||||
str r2,[r3,#ACCUM1_OFFSET0] // set accum1
|
||||
str r2,[r3,#ACCUM1_OFFSET1] // set accum1
|
||||
|
||||
// ---- process odd 4-pixel
|
||||
|
||||
// prepare tile bits * 2
|
||||
ldr r6,[sp,#0] // get tile bits
|
||||
lsls r6,#1 // tile bits * 2
|
||||
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... (temporary - pixel accumulator 1)
|
||||
// R2 ... (temporary - pixel accumulator 2)
|
||||
// R3 ... interpolator base
|
||||
// R4 ... (temporary - get pointer to tile map, load tile index)
|
||||
// R5 ... (temporary - get pointer to pixel, load pixel)
|
||||
// R6 ... tilebits*2
|
||||
// R7 ... width/4 (loop counter)
|
||||
// [SP+0] ... number of bits of tile width and height
|
||||
|
||||
// check odd 4-pixels
|
||||
lsrs r7,#1 // width/4/2
|
||||
bcc 2f // no odd 4-pixel
|
||||
|
||||
// [9] load 1st pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r1,[r5,r4] // [2] load pixel
|
||||
lsls r4,r1,#8 // [1] shift 1 byte left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [11] load 2nd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r4,[r5,r4] // [2] load pixel
|
||||
lsls r4,#16 // [1] shift 2 bytes left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
lsls r4,#8 // [1] shift 1 byte left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [2] store 4 pixels
|
||||
stmia r0!,{r1} // [2] store 4 pixels
|
||||
|
||||
// check number of remaining pixels
|
||||
2: tst r7,r7 // check number of pixels
|
||||
beq 8f // end
|
||||
|
||||
// ---- [46 per 8 pixels] inner loop
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... (temporary - pixel accumulator 1)
|
||||
// R2 ... (temporary - pixel accumulator 2)
|
||||
// R3 ... interpolator base
|
||||
// R4 ... (temporary - get pointer to tile map, load tile index)
|
||||
// R5 ... (temporary - get pointer to pixel, load pixel)
|
||||
// R6 ... tilebits*2
|
||||
// R7 ... width/8 (loop counter)
|
||||
|
||||
// [9] load 1st pixel
|
||||
6: ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r1,[r5,r4] // [2] load pixel
|
||||
lsls r4,r1,#8 // [1] shift 1 byte left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [11] load 2nd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r4,[r5,r4] // [2] load pixel
|
||||
lsls r4,#16 // [1] shift 2 bytes left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
lsls r4,#8 // [1] shift 1 byte left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [9] load 1st pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r2,[r5,r4] // [2] load pixel
|
||||
lsls r4,r2,#8 // [1] shift 1 byte left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [11] load 2nd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r4,[r5,r4] // [2] load pixel
|
||||
lsls r4,#16 // [1] shift 2 bytes left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
lsls r4,#8 // [1] shift 1 byte left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [3] store 8 pixels
|
||||
stmia r0!,{r1,r2} // [3] store 8 pixels
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r7,#1 // [1] 8-pixel counter
|
||||
bne 6b // [1,2] next 8-pixels
|
||||
|
||||
// pop registers
|
||||
8: pop {r3-r7,pc}
|
||||
|
||||
.align 2
|
||||
// pointer to SIO base
|
||||
RenderTilePersp_pSioBase:
|
||||
.word SIO_BASE // addres of SIO base
|
||||
|
||||
// pointer to Interp0 base
|
||||
RenderTilePersp_Interp:
|
||||
.word SIO_BASE+SIO_INTERP0_ACCUM0_OFFSET // addres of interpolator 0 base
|
||||
|
||||
RenderTilePersp_Ctrl: // lane control word
|
||||
.word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT<<SIO_INTERP0_CTRL_LANE0_SHIFT_LSB)
|
||||
|
|
@ -1,394 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_TILEPERSP3
|
||||
//
|
||||
// ****************************************************************************
|
||||
// data ... tile map
|
||||
// par ... column of tile images
|
||||
// par2 ... pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL))
|
||||
// par3 ... LOW8=number of bits of tile width and height, HIGH8=horizon offset
|
||||
// wb ... LOW8=number of bits of tile map width, HIGH8=number of bits of tile map height
|
||||
// wrapy ... segment height
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
#include "hardware/regs/sio.h" // registers of hardware divider
|
||||
#include "hardware/regs/addressmap.h" // SIO base address
|
||||
|
||||
#define ACCUM0_OFFSET0 0
|
||||
#define ACCUM1_OFFSET0 4
|
||||
#define BASE0_OFFSET0 8
|
||||
#define BASE1_OFFSET0 12
|
||||
#define BASE2_OFFSET0 16
|
||||
#define POP_LANE0_OFFSET0 20
|
||||
#define POP_LANE1_OFFSET0 24
|
||||
#define POP_FULL_OFFSET0 28
|
||||
#define PEEK_LANE0_OFFSET0 32
|
||||
#define PEEK_LANE1_OFFSET0 36
|
||||
#define PEEK_FULL_OFFSET0 40
|
||||
#define CTRL_LANE0_OFFSET0 44
|
||||
#define CTRL_LANE1_OFFSET0 48
|
||||
#define ACCUM0_ADD_OFFSET0 52
|
||||
#define ACCUM1_ADD_OFFSET0 56
|
||||
#define BASE_1AND0_OFFSET0 60
|
||||
|
||||
#define ACCUM0_OFFSET1 64
|
||||
#define ACCUM1_OFFSET1 68
|
||||
#define BASE0_OFFSET1 72
|
||||
#define BASE1_OFFSET1 76
|
||||
#define BASE2_OFFSET1 80
|
||||
#define POP_LANE0_OFFSET1 84
|
||||
#define POP_LANE1_OFFSET1 88
|
||||
#define POP_FULL_OFFSET1 92
|
||||
#define PEEK_LANE0_OFFSET1 96
|
||||
#define PEEK_LANE1_OFFSET1 100
|
||||
#define PEEK_FULL_OFFSET1 104
|
||||
#define CTRL_LANE0_OFFSET1 108
|
||||
#define CTRL_LANE1_OFFSET1 112
|
||||
#define ACCUM0_ADD_OFFSET1 116
|
||||
#define ACCUM1_ADD_OFFSET1 120
|
||||
#define BASE_1AND0_OFFSET1 124
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// extern "C" u32* RenderTilePersp3(u32* cbuf, int x, int y, int w, sSegm* segm);
|
||||
|
||||
// render tiles with perspective GF_TILEPERSP3, triple pixels
|
||||
// using hardware interpolator inter0 and inter1 (their state is not saved during interrup)
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... start X coordinate (not used)
|
||||
// R2 ... start Y coordinate (in graphics lines)
|
||||
// R3 ... width to display (must be multiple of 4)
|
||||
// [stack] ... segm video segment sSegm
|
||||
// Output new pointer to data buffer.
|
||||
// 320 pixels takes ?? us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderTilePersp3
|
||||
RenderTilePersp3:
|
||||
|
||||
// Input registers and stack:
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... X coordinate (not used)
|
||||
// R2 ... Y coordinate
|
||||
// SP+0: R3 ... remaining width
|
||||
// SP+4: R4
|
||||
// SP+8: R5
|
||||
// SP+12: R6
|
||||
// SP+16: R7
|
||||
// SP+20: LR
|
||||
// SP+24: video segment
|
||||
|
||||
// push registers
|
||||
push {r3-r7,lr}
|
||||
|
||||
// ---- prepare registers
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#24] // load video segment -> R4
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R2 ... Y coordinate
|
||||
// R3 ... remaining width
|
||||
// R4 ... video segment
|
||||
|
||||
// load horizon offset -> R1, check if use perspective
|
||||
ldr r6,RenderTilePersp_pSioBase // get address of SIO base -> R6
|
||||
ldrh r5,[r4,#SSEGM_WRAPY] // get segment height -> R5
|
||||
ldrb r1,[r4,#SSEGM_PAR3+1] // get horizon offset -> R1
|
||||
sxtb r1,r1 // signed extension
|
||||
lsls r1,#2 // horizon * 4, horizon = 0 ?
|
||||
bne 2f // use perspective
|
||||
|
||||
// not using perspective, start Y coordinate y0 = y - h/2 -> R12
|
||||
lsrs r5,#1 // segment height/2 -> R5
|
||||
subs r2,r5 // y - h/2 -> R2
|
||||
mov r12,r2 // current coordinate Y0 = y - h/2 -> R12
|
||||
|
||||
// prepare divide result to get 1<<FRACT
|
||||
movs r5,#1 // R5 <- 1
|
||||
str r5,[r6,#SIO_DIV_UDIVISOR_OFFSET] // divisor = 1
|
||||
lsls r5,#FRACT // constant 1<<FRACT -> R5
|
||||
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL
|
||||
b 4f
|
||||
|
||||
// using perspective, check ceilling mode
|
||||
2: bpl 3f // horizon is not negative
|
||||
subs r2,r5,r2 // negate, y = h - y
|
||||
subs r2,#1 // y = h - 1 - y
|
||||
negs r1,r1 // absolute value of horizon
|
||||
|
||||
// prepare current coordinate Y0 = y - h -> R12
|
||||
3: subs r7,r2,r5 // y - h = current Y coordinate -> R7
|
||||
mov r12,r7 // store current coordinate Y0 -> R12
|
||||
|
||||
// start calculating distance coefficient dist = FRACTMUL*h/(y + horiz)
|
||||
lsls r5,#FRACT // segment height * FRACTMUL -> R5
|
||||
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h
|
||||
adds r2,r1 // horizon + y -> R2
|
||||
str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R3 ... remaining width
|
||||
// R4 ... video segment
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// prepare start coordinate X0 = -w/2 -> LR
|
||||
4: lsrs r5,r3,#1 // width/2
|
||||
negs r5,r5 // negate
|
||||
mov lr,r5 // store start coordinate X0 -> LR
|
||||
|
||||
// prepare number of 4-pixels (loop counter) -> R7
|
||||
lsrs r7,r3,#2 // width/4 -> R7
|
||||
|
||||
// prepare address of interpolator 0 base -> R3
|
||||
ldr r3,RenderTilePersp_Interp // get address of interpolator 0 base -> R3
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R3 ... interpolator base
|
||||
// R4 ... video segment
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// ---- setup interpolator 0 to get tile index
|
||||
|
||||
// set tile map base to base2
|
||||
ldr r6,[r4,#SSEGM_DATA] // load tile map base
|
||||
str r6,[r3,#BASE2_OFFSET0] // set tile map base
|
||||
|
||||
// set control word of lane 0: shift=FRACT+tilebits, mask=0..mapwbits-1
|
||||
ldr r6,RenderTilePersp_Ctrl // load control word
|
||||
ldrb r1,[r4,#SSEGM_PAR3] // get tile width and height -> R1
|
||||
str r1,[sp,#0] // save tile size -> [SP+0]
|
||||
adds r6,r1 // FRACT + tilebits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required)
|
||||
ldrb r2,[r4,#SSEGM_WB] // number of bits of tile map width mapwbits -> R2
|
||||
subs r5,r2,#1 // mapwbits - 1
|
||||
lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position
|
||||
orrs r6,r5 // add to control word
|
||||
str r6,[r3,#CTRL_LANE0_OFFSET0] // set control word of lane 0
|
||||
|
||||
// set control word of lane 1: shift=FRACT+tilebits-mapwbits,
|
||||
// mask=mapwbits..mapwbits+maphbits-1
|
||||
subs r6,r2 // FRACT + tilebits - mapwbits
|
||||
lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift mapwbits to mask LSB position
|
||||
orrs r6,r2 // add mapwbits to control word
|
||||
ldrb r2,[r4,#SSEGM_WB+1] // number of bits of tile map height maphbits -> R2
|
||||
lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift maphbits to mask MSB position
|
||||
adds r6,r2 // add to control word
|
||||
str r6,[r3,#CTRL_LANE1_OFFSET0] // set control word of lane 1
|
||||
|
||||
// ---- setup interpolator 1 to get pixel index
|
||||
|
||||
// set tile image to base2
|
||||
ldr r6,[r4,#SSEGM_PAR] // load tile image base
|
||||
str r6,[r3,#BASE2_OFFSET1] // set tile image base
|
||||
|
||||
// set control word of lane 0: shift=FRACT, mask=0..tilebits-1
|
||||
ldr r6,RenderTilePersp_Ctrl // load control word
|
||||
subs r5,r1,#1 // tilebits - 1
|
||||
lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position
|
||||
orrs r6,r5 // add to control word
|
||||
str r6,[r3,#CTRL_LANE0_OFFSET1] // set control word of lane 0
|
||||
|
||||
// set control word of lane 1: shift=FRACT-tilebits, mask=tilebits..tilebits*2-1
|
||||
subs r6,r1 // FRACT - tilebits
|
||||
lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift to mask LSB position
|
||||
orrs r6,r5 // add tilebits to control word
|
||||
lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift tilebits to mask MSB position
|
||||
adds r6,r1 // add to control word
|
||||
str r6,[r3,#CTRL_LANE1_OFFSET1] // set control word of lane 1
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R3 ... interpolator base
|
||||
// R4 ... video segment
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
// [SP+0] ... number of bits of tile width and height
|
||||
|
||||
// ---- set matrix
|
||||
|
||||
// get pointer to matrix -> R4
|
||||
ldr r4,[r4,#SSEGM_PAR2] // get pointer to matrix -> R4
|
||||
|
||||
// get distance coefficient dist -> R1
|
||||
ldr r1,RenderTilePersp_pSioBase // get address of SIO base -> R1
|
||||
ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient
|
||||
|
||||
// r4+0 ... m11
|
||||
// r4+4 ... m12
|
||||
// r4+8 ... m13
|
||||
// r4+12 ... m21
|
||||
// r4+16 ... m22
|
||||
// r4+20 ... m23
|
||||
|
||||
// set m11 -> R5 base0
|
||||
ldr r5,[r4,#0] // load m11
|
||||
muls r5,r1 // m11*dist
|
||||
asrs r5,#FRACT // (m11*dist)>>FRACT ... delta
|
||||
lsls r2,r5,#1 // delta*2
|
||||
adds r2,r5 // delta*3
|
||||
str r2,[r3,#BASE0_OFFSET0] // set base0
|
||||
str r2,[r3,#BASE0_OFFSET1] // set base0
|
||||
|
||||
// set m21 -> R6 base1
|
||||
ldr r6,[r4,#12] // load m21
|
||||
muls r6,r1 // m21*dist
|
||||
asrs r6,#FRACT // (m21*dist)>>FRACT ... delta
|
||||
lsls r2,r6,#1 // delta*2
|
||||
adds r2,r6 // delta*3
|
||||
str r2,[r3,#BASE1_OFFSET0] // set base1
|
||||
str r2,[r3,#BASE1_OFFSET1] // set base1
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R1 ... distance coefficient
|
||||
// R3 ... interpolator base
|
||||
// R4 ... pointer to matrix
|
||||
// R5 ... m11
|
||||
// R6 ... m21
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
// [SP+0] ... number of bits of tile width and height
|
||||
|
||||
// set x0*m11 + y0*m12 + m13 -> accum0
|
||||
mov r2,lr // start coordinate X0 -> X2
|
||||
muls r5,r2 // x0*m11 -> R5
|
||||
muls r2,r6 // x0*m21 -> R2
|
||||
mov lr,r1 // save distance coefficient -> LR
|
||||
ldr r6,[r4,#4] // load m12 -> R6
|
||||
muls r1,r6 // m12*dist -> R1
|
||||
asrs r1,#FRACT // (m12*dist)>>FRACT -> R1
|
||||
mov r6,r12 // load coordinate Y0 -> R6
|
||||
muls r1,r6 // y0*m12 -> R1
|
||||
adds r5,r1 // x0*m11 + y0*m12 -> R5
|
||||
ldr r1,[r4,#8] // load m13 -> R1
|
||||
adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5
|
||||
str r5,[r3,#ACCUM0_OFFSET0] // set accum0
|
||||
str r5,[r3,#ACCUM0_OFFSET1] // set accum0
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R2 ... x0*m21
|
||||
// R3 ... interpolator base
|
||||
// R4 ... pointer to matrix
|
||||
// R6 ... current coordinate Y0
|
||||
// R7 ... width/4
|
||||
// LR ... distance coefficient
|
||||
// [SP+0] ... number of bits of tile width and height
|
||||
|
||||
// set x0*m21 + y0*m22 + m23 -> accum1
|
||||
ldr r1,[r4,#16] // load m22 -> R1
|
||||
mov r5,lr // distance coefficient -> R5
|
||||
muls r1,r5 // m22*dist
|
||||
asrs r1,#FRACT // (m22*dist)>>FRACT -> R1
|
||||
muls r1,r6 // y0*m22 -> R1
|
||||
adds r2,r1 // x0*m21 + y0*m22 -> R2
|
||||
ldr r1,[r4,#20] // load m23 -> R1
|
||||
adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2
|
||||
str r2,[r3,#ACCUM1_OFFSET0] // set accum1
|
||||
str r2,[r3,#ACCUM1_OFFSET1] // set accum1
|
||||
|
||||
// ---- process odd 4-pixel
|
||||
|
||||
// prepare tile bits * 2
|
||||
ldr r6,[sp,#0] // get tile bits
|
||||
lsls r6,#1 // tile bits * 2
|
||||
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... (temporary - pixel accumulator 1)
|
||||
// R2 ... (temporary - pixel accumulator 2)
|
||||
// R3 ... interpolator base
|
||||
// R4 ... (temporary - get pointer to tile map, load tile index)
|
||||
// R5 ... (temporary - get pointer to pixel, load pixel)
|
||||
// R6 ... tilebits*2
|
||||
// R7 ... width/4 (loop counter)
|
||||
// [SP+0] ... number of bits of tile width and height
|
||||
|
||||
// check odd 4-pixels
|
||||
lsrs r7,#1 // width/4/2
|
||||
bcc 2f // no odd 4-pixel
|
||||
|
||||
// load pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r1,[r5,r4] // [2] load pixel
|
||||
lsls r4,r1,#8 // [1] shift 1 byte left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
lsls r4,r1,#16 // [1] shift 2 bytes left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [2] store 4 pixels
|
||||
stmia r0!,{r1} // [2] store 4 pixels
|
||||
|
||||
// check number of remaining pixels
|
||||
2: tst r7,r7 // check number of pixels
|
||||
beq 8f // end
|
||||
|
||||
// ---- [37 per 8 pixels] inner loop
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... (temporary - pixel accumulator 1)
|
||||
// R2 ... (temporary - pixel accumulator 2)
|
||||
// R3 ... interpolator base
|
||||
// R4 ... (temporary - get pointer to tile map, load tile index)
|
||||
// R5 ... (temporary - get pointer to pixel, load pixel)
|
||||
// R6 ... tilebits*2
|
||||
// R7 ... width/8 (loop counter)
|
||||
|
||||
// [9] load 1st pixel
|
||||
6: ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r1,[r5,r4] // [2] load pixel
|
||||
lsls r4,r1,#8 // [1] shift 1 byte left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [11] load 2nd pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r4,[r5,r4] // [2] load pixel
|
||||
lsls r4,#16 // [1] shift 2 bytes left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
lsls r4,#8 // [1] shift 1 byte left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [11] load pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r2,[r5,r4] // [2] load pixel
|
||||
lsls r4,r2,#8 // [1] shift 1 byte left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
lsls r4,r2,#16 // [1] shift 2 bytes left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [3] store 8 pixels
|
||||
stmia r0!,{r1,r2} // [3] store 8 pixels
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r7,#1 // [1] 8-pixel counter
|
||||
bne 6b // [1,2] next 8-pixels
|
||||
|
||||
// pop registers
|
||||
8: pop {r3-r7,pc}
|
||||
|
||||
.align 2
|
||||
// pointer to SIO base
|
||||
RenderTilePersp_pSioBase:
|
||||
.word SIO_BASE // addres of SIO base
|
||||
|
||||
// pointer to Interp0 base
|
||||
RenderTilePersp_Interp:
|
||||
.word SIO_BASE+SIO_INTERP0_ACCUM0_OFFSET // addres of interpolator 0 base
|
||||
|
||||
RenderTilePersp_Ctrl: // lane control word
|
||||
.word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT<<SIO_INTERP0_CTRL_LANE0_SHIFT_LSB)
|
||||
|
|
@ -1,383 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render GF_TILEPERSP4
|
||||
//
|
||||
// ****************************************************************************
|
||||
// data ... tile map
|
||||
// par ... column of tile images
|
||||
// par2 ... pointer to 6 matrix integer parameters m11,m12..m23 ((int)(m*FRACTMUL))
|
||||
// par3 ... LOW8=number of bits of tile width and height, HIGH8=horizon offset
|
||||
// wb ... LOW8=number of bits of tile map width, HIGH8=number of bits of tile map height
|
||||
// wrapy ... segment height
|
||||
|
||||
#include "../define.h" // common definitions of C and ASM
|
||||
#include "hardware/regs/sio.h" // registers of hardware divider
|
||||
#include "hardware/regs/addressmap.h" // SIO base address
|
||||
|
||||
#define ACCUM0_OFFSET0 0
|
||||
#define ACCUM1_OFFSET0 4
|
||||
#define BASE0_OFFSET0 8
|
||||
#define BASE1_OFFSET0 12
|
||||
#define BASE2_OFFSET0 16
|
||||
#define POP_LANE0_OFFSET0 20
|
||||
#define POP_LANE1_OFFSET0 24
|
||||
#define POP_FULL_OFFSET0 28
|
||||
#define PEEK_LANE0_OFFSET0 32
|
||||
#define PEEK_LANE1_OFFSET0 36
|
||||
#define PEEK_FULL_OFFSET0 40
|
||||
#define CTRL_LANE0_OFFSET0 44
|
||||
#define CTRL_LANE1_OFFSET0 48
|
||||
#define ACCUM0_ADD_OFFSET0 52
|
||||
#define ACCUM1_ADD_OFFSET0 56
|
||||
#define BASE_1AND0_OFFSET0 60
|
||||
|
||||
#define ACCUM0_OFFSET1 64
|
||||
#define ACCUM1_OFFSET1 68
|
||||
#define BASE0_OFFSET1 72
|
||||
#define BASE1_OFFSET1 76
|
||||
#define BASE2_OFFSET1 80
|
||||
#define POP_LANE0_OFFSET1 84
|
||||
#define POP_LANE1_OFFSET1 88
|
||||
#define POP_FULL_OFFSET1 92
|
||||
#define PEEK_LANE0_OFFSET1 96
|
||||
#define PEEK_LANE1_OFFSET1 100
|
||||
#define PEEK_FULL_OFFSET1 104
|
||||
#define CTRL_LANE0_OFFSET1 108
|
||||
#define CTRL_LANE1_OFFSET1 112
|
||||
#define ACCUM0_ADD_OFFSET1 116
|
||||
#define ACCUM1_ADD_OFFSET1 120
|
||||
#define BASE_1AND0_OFFSET1 124
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// extern "C" u32* RenderTilePersp4(u32* cbuf, int x, int y, int w, sSegm* segm);
|
||||
|
||||
// render tiles with perspective GF_TILEPERSP4, quadruple pixels
|
||||
// using hardware interpolator inter0 and inter1 (their state is not saved during interrup)
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... start X coordinate (not used)
|
||||
// R2 ... start Y coordinate (in graphics lines)
|
||||
// R3 ... width to display (must be multiple of 4)
|
||||
// [stack] ... segm video segment sSegm
|
||||
// Output new pointer to data buffer.
|
||||
// 320 pixels takes ?? us on 151 MHz.
|
||||
|
||||
.thumb_func
|
||||
.global RenderTilePersp4
|
||||
RenderTilePersp4:
|
||||
|
||||
// Input registers and stack:
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... X coordinate (not used)
|
||||
// R2 ... Y coordinate
|
||||
// SP+0: R3 ... remaining width
|
||||
// SP+4: R4
|
||||
// SP+8: R5
|
||||
// SP+12: R6
|
||||
// SP+16: R7
|
||||
// SP+20: LR
|
||||
// SP+24: video segment
|
||||
|
||||
// push registers
|
||||
push {r3-r7,lr}
|
||||
|
||||
// ---- prepare registers
|
||||
|
||||
// get pointer to video segment -> R4
|
||||
ldr r4,[sp,#24] // load video segment -> R4
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R2 ... Y coordinate
|
||||
// R3 ... remaining width
|
||||
// R4 ... video segment
|
||||
|
||||
// load horizon offset -> R1, check if use perspective
|
||||
ldr r6,RenderTilePersp_pSioBase // get address of SIO base -> R6
|
||||
ldrh r5,[r4,#SSEGM_WRAPY] // get segment height -> R5
|
||||
ldrb r1,[r4,#SSEGM_PAR3+1] // get horizon offset -> R1
|
||||
sxtb r1,r1 // signed extension
|
||||
lsls r1,#2 // horizon * 4, horizon = 0 ?
|
||||
bne 2f // use perspective
|
||||
|
||||
// not using perspective, start Y coordinate y0 = y - h/2 -> R12
|
||||
lsrs r5,#1 // segment height/2 -> R5
|
||||
subs r2,r5 // y - h/2 -> R2
|
||||
mov r12,r2 // current coordinate Y0 = y - h/2 -> R12
|
||||
|
||||
// prepare divide result to get 1<<FRACT
|
||||
movs r5,#1 // R5 <- 1
|
||||
str r5,[r6,#SIO_DIV_UDIVISOR_OFFSET] // divisor = 1
|
||||
lsls r5,#FRACT // constant 1<<FRACT -> R5
|
||||
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // dividend = FRACTMUL
|
||||
b 4f
|
||||
|
||||
// using perspective, check ceilling mode
|
||||
2: bpl 3f // horizon is not negative
|
||||
subs r2,r5,r2 // negate, y = h - y
|
||||
subs r2,#1 // y = h - 1 - y
|
||||
negs r1,r1 // absolute value of horizon
|
||||
|
||||
// prepare current coordinate Y0 = y - h -> R12
|
||||
3: subs r7,r2,r5 // y - h = current Y coordinate -> R7
|
||||
mov r12,r7 // store current coordinate Y0 -> R12
|
||||
|
||||
// start calculating distance coefficient dist = FRACTMUL*h/(y + horiz)
|
||||
lsls r5,#FRACT // segment height * FRACTMUL -> R5
|
||||
str r5,[r6,#SIO_DIV_UDIVIDEND_OFFSET] // store dividend, FRACTMUL*h
|
||||
adds r2,r1 // horizon + y -> R2
|
||||
str r2,[r6,#SIO_DIV_UDIVISOR_OFFSET] // store divisor, y + horiz
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R3 ... remaining width
|
||||
// R4 ... video segment
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// prepare start coordinate X0 = -w/2 -> LR
|
||||
4: lsrs r5,r3,#1 // width/2
|
||||
negs r5,r5 // negate
|
||||
mov lr,r5 // store start coordinate X0 -> LR
|
||||
|
||||
// prepare number of 4-pixels (loop counter) -> R7
|
||||
lsrs r7,r3,#2 // width/4 -> R7
|
||||
|
||||
// prepare address of interpolator 0 base -> R3
|
||||
ldr r3,RenderTilePersp_Interp // get address of interpolator 0 base -> R3
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R3 ... interpolator base
|
||||
// R4 ... video segment
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
|
||||
// ---- setup interpolator 0 to get tile index
|
||||
|
||||
// set tile map base to base2
|
||||
ldr r6,[r4,#SSEGM_DATA] // load tile map base
|
||||
str r6,[r3,#BASE2_OFFSET0] // set tile map base
|
||||
|
||||
// set control word of lane 0: shift=FRACT+tilebits, mask=0..mapwbits-1
|
||||
ldr r6,RenderTilePersp_Ctrl // load control word
|
||||
ldrb r1,[r4,#SSEGM_PAR3] // get tile width and height -> R1
|
||||
str r1,[sp,#0] // save tile size -> [SP+0]
|
||||
adds r6,r1 // FRACT + tilebits (SIO_INTERP0_CTRL_LANE0_SHIFT_LSB = 0, no shift required)
|
||||
ldrb r2,[r4,#SSEGM_WB] // number of bits of tile map width mapwbits -> R2
|
||||
subs r5,r2,#1 // mapwbits - 1
|
||||
lsls r5,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position
|
||||
orrs r6,r5 // add to control word
|
||||
str r6,[r3,#CTRL_LANE0_OFFSET0] // set control word of lane 0
|
||||
|
||||
// set control word of lane 1: shift=FRACT+tilebits-mapwbits,
|
||||
// mask=mapwbits..mapwbits+maphbits-1
|
||||
subs r6,r2 // FRACT + tilebits - mapwbits
|
||||
lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB // shift mapwbits to mask LSB position
|
||||
orrs r6,r2 // add mapwbits to control word
|
||||
ldrb r2,[r4,#SSEGM_WB+1] // number of bits of tile map height maphbits -> R2
|
||||
lsls r2,#SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB // shift maphbits to mask MSB position
|
||||
adds r6,r2 // add to control word
|
||||
str r6,[r3,#CTRL_LANE1_OFFSET0] // set control word of lane 1
|
||||
|
||||
// ---- setup interpolator 1 to get pixel index
|
||||
|
||||
// set tile image to base2
|
||||
ldr r6,[r4,#SSEGM_PAR] // load tile image base
|
||||
str r6,[r3,#BASE2_OFFSET1] // set tile image base
|
||||
|
||||
// set control word of lane 0: shift=FRACT, mask=0..tilebits-1
|
||||
ldr r6,RenderTilePersp_Ctrl // load control word
|
||||
subs r5,r1,#1 // tilebits - 1
|
||||
lsls r5,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift to mask MSB position
|
||||
orrs r6,r5 // add to control word
|
||||
str r6,[r3,#CTRL_LANE0_OFFSET1] // set control word of lane 0
|
||||
|
||||
// set control word of lane 1: shift=FRACT-tilebits, mask=tilebits..tilebits*2-1
|
||||
subs r6,r1 // FRACT - tilebits
|
||||
lsls r5,r1,#SIO_INTERP1_CTRL_LANE0_MASK_LSB_LSB // shift to mask LSB position
|
||||
orrs r6,r5 // add tilebits to control word
|
||||
lsls r1,#SIO_INTERP1_CTRL_LANE0_MASK_MSB_LSB // shift tilebits to mask MSB position
|
||||
adds r6,r1 // add to control word
|
||||
str r6,[r3,#CTRL_LANE1_OFFSET1] // set control word of lane 1
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R3 ... interpolator base
|
||||
// R4 ... video segment
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
// [SP+0] ... number of bits of tile width and height
|
||||
|
||||
// ---- set matrix
|
||||
|
||||
// get pointer to matrix -> R4
|
||||
ldr r4,[r4,#SSEGM_PAR2] // get pointer to matrix -> R4
|
||||
|
||||
// get distance coefficient dist -> R1
|
||||
ldr r1,RenderTilePersp_pSioBase // get address of SIO base -> R1
|
||||
ldr r1,[r1,#SIO_DIV_QUOTIENT_OFFSET] // get quotient-> R1, distance coefficient
|
||||
|
||||
// r4+0 ... m11
|
||||
// r4+4 ... m12
|
||||
// r4+8 ... m13
|
||||
// r4+12 ... m21
|
||||
// r4+16 ... m22
|
||||
// r4+20 ... m23
|
||||
|
||||
// set m11 -> R5 base0
|
||||
ldr r5,[r4,#0] // load m11
|
||||
muls r5,r1 // m11*dist
|
||||
asrs r5,#FRACT-2 // (m11*dist)>>(FRACT-2) ... 4*delta
|
||||
str r5,[r3,#BASE0_OFFSET0] // set base0
|
||||
str r5,[r3,#BASE0_OFFSET1] // set base0
|
||||
asrs r5,#2 // (m11*dist)>>FRACT
|
||||
|
||||
// set m21 -> R6 base1
|
||||
ldr r6,[r4,#12] // load m21
|
||||
muls r6,r1 // m21*dist
|
||||
asrs r6,#FRACT-2 // (m21*dist)>>(FRACT-2) ... 4*delta
|
||||
str r6,[r3,#BASE1_OFFSET0] // set base1
|
||||
str r6,[r3,#BASE1_OFFSET1] // set base1
|
||||
asrs r6,#2 // (m21*dist)>>FRACT
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R1 ... distance coefficient
|
||||
// R3 ... interpolator base
|
||||
// R4 ... pointer to matrix
|
||||
// R5 ... m11
|
||||
// R6 ... m21
|
||||
// R7 ... width/4
|
||||
// LR ... start coordinate X0
|
||||
// R12 ... current coordinate Y0
|
||||
// [SP+0] ... number of bits of tile width and height
|
||||
|
||||
// set x0*m11 + y0*m12 + m13 -> accum0
|
||||
mov r2,lr // start coordinate X0 -> X2
|
||||
muls r5,r2 // x0*m11 -> R5
|
||||
muls r2,r6 // x0*m21 -> R2
|
||||
mov lr,r1 // save distance coefficient -> LR
|
||||
ldr r6,[r4,#4] // load m12 -> R6
|
||||
muls r1,r6 // m12*dist -> R1
|
||||
asrs r1,#FRACT // (m12*dist)>>FRACT -> R1
|
||||
mov r6,r12 // load coordinate Y0 -> R6
|
||||
muls r1,r6 // y0*m12 -> R1
|
||||
adds r5,r1 // x0*m11 + y0*m12 -> R5
|
||||
ldr r1,[r4,#8] // load m13 -> R1
|
||||
adds r5,r1 // x0*m11 + y0*m12 + m13 -> R5
|
||||
str r5,[r3,#ACCUM0_OFFSET0] // set accum0
|
||||
str r5,[r3,#ACCUM0_OFFSET1] // set accum0
|
||||
|
||||
// R0 ... pointer to data buffer
|
||||
// R2 ... x0*m21
|
||||
// R3 ... interpolator base
|
||||
// R4 ... pointer to matrix
|
||||
// R6 ... current coordinate Y0
|
||||
// R7 ... width/4
|
||||
// LR ... distance coefficient
|
||||
// [SP+0] ... number of bits of tile width and height
|
||||
|
||||
// set x0*m21 + y0*m22 + m23 -> accum1
|
||||
ldr r1,[r4,#16] // load m22 -> R1
|
||||
mov r5,lr // distance coefficient -> R5
|
||||
muls r1,r5 // m22*dist
|
||||
asrs r1,#FRACT // (m22*dist)>>FRACT -> R1
|
||||
muls r1,r6 // y0*m22 -> R1
|
||||
adds r2,r1 // x0*m21 + y0*m22 -> R2
|
||||
ldr r1,[r4,#20] // load m23 -> R1
|
||||
adds r2,r1 // x0*m21 + y0*m22 + m23 -> R2
|
||||
str r2,[r3,#ACCUM1_OFFSET0] // set accum1
|
||||
str r2,[r3,#ACCUM1_OFFSET1] // set accum1
|
||||
|
||||
// ---- process odd 4-pixel
|
||||
|
||||
// prepare tile bits * 2
|
||||
ldr r6,[sp,#0] // get tile bits
|
||||
lsls r6,#1 // tile bits * 2
|
||||
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... (temporary - pixel accumulator 1)
|
||||
// R2 ... (temporary - pixel accumulator 2)
|
||||
// R3 ... interpolator base
|
||||
// R4 ... (temporary - get pointer to tile map, load tile index)
|
||||
// R5 ... (temporary - get pointer to pixel, load pixel)
|
||||
// R6 ... tilebits*2
|
||||
// R7 ... width/4 (loop counter)
|
||||
// [SP+0] ... number of bits of tile width and height
|
||||
|
||||
// check odd 4-pixels
|
||||
lsrs r7,#1 // width/4/2
|
||||
bcc 2f // no odd 4-pixel
|
||||
|
||||
// load pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r1,[r5,r4] // [2] load pixel
|
||||
lsls r4,r1,#8 // [1] shift 1 byte left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
lsls r4,r1,#16 // [1] shift 2 bytes left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [2] store 4 pixels
|
||||
stmia r0!,{r1} // [2] store 4 pixels
|
||||
|
||||
// check number of remaining pixels
|
||||
2: tst r7,r7 // check number of pixels
|
||||
beq 8f // end
|
||||
|
||||
// ---- [28 per 8 pixels] inner loop
|
||||
// R0 ... pointer to destination data buffer
|
||||
// R1 ... (temporary - pixel accumulator 1)
|
||||
// R2 ... (temporary - pixel accumulator 2)
|
||||
// R3 ... interpolator base
|
||||
// R4 ... (temporary - get pointer to tile map, load tile index)
|
||||
// R5 ... (temporary - get pointer to pixel, load pixel)
|
||||
// R6 ... tilebits*2
|
||||
// R7 ... width/8 (loop counter)
|
||||
|
||||
// [11] load pixel
|
||||
6: ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r1,[r5,r4] // [2] load pixel
|
||||
lsls r4,r1,#8 // [1] shift 1 byte left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
lsls r4,r1,#16 // [1] shift 2 bytes left
|
||||
orrs r1,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [11] load pixel
|
||||
ldr r4,[r3,#POP_FULL_OFFSET0] // [1] get pointer to tile map
|
||||
ldrb r4,[r4,#0] // [2] load tile index
|
||||
lsls r4,r6 // [1] tile index * tile size
|
||||
ldr r5,[r3,#POP_FULL_OFFSET1] // [1] get pointer to tile image
|
||||
ldrb r2,[r5,r4] // [2] load pixel
|
||||
lsls r4,r2,#8 // [1] shift 1 byte left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
lsls r4,r2,#16 // [1] shift 2 bytes left
|
||||
orrs r2,r4 // [1] add pixel to accumulator
|
||||
|
||||
// [3] store 8 pixels
|
||||
stmia r0!,{r1,r2} // [3] store 8 pixels
|
||||
|
||||
// [2,3] loop counter
|
||||
subs r7,#1 // [1] 8-pixel counter
|
||||
bne 6b // [1,2] next 8-pixels
|
||||
|
||||
// pop registers
|
||||
8: pop {r3-r7,pc}
|
||||
|
||||
.align 2
|
||||
// pointer to SIO base
|
||||
RenderTilePersp_pSioBase:
|
||||
.word SIO_BASE // addres of SIO base
|
||||
|
||||
// pointer to Interp0 base
|
||||
RenderTilePersp_Interp:
|
||||
.word SIO_BASE+SIO_INTERP0_ACCUM0_OFFSET // addres of interpolator 0 base
|
||||
|
||||
RenderTilePersp_Ctrl: // lane control word
|
||||
.word SIO_INTERP0_CTRL_LANE0_ADD_RAW_BITS | (FRACT<<SIO_INTERP0_CTRL_LANE0_SHIFT_LSB)
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,8 +1,11 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA output
|
||||
//
|
||||
// file derived from the PicoVGA project
|
||||
// https://github.com/Panda381/PicoVGA
|
||||
// by Miroslav Nemecek
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#ifndef _VGA_H
|
||||
|
|
@ -10,124 +13,34 @@
|
|||
|
||||
// scanline type
|
||||
#define LINE_VSYNC 0 // long vertical sync
|
||||
#define LINE_VVSYNC 1 // short vertical + vertical sync
|
||||
#define LINE_VHSYNC 2 // short vertical + horizontal sync
|
||||
#define LINE_HHSYNC 3 // short horizontal + horizontal sync
|
||||
#define LINE_HVSYNC 4 // short horizontal + vertical sync
|
||||
#define LINE_DARK 5 // dark line
|
||||
#define LINE_IMG 6 // progressive image 0, 1, 2,...
|
||||
#define LINE_IMGEVEN1 7 // interlaced image even 0, 2, 4,..., 1st subframe
|
||||
#define LINE_IMGEVEN2 8 // interlaced image even 0, 2, 4,..., 2nd subframe
|
||||
#define LINE_IMGODD1 9 // interlaced image odd 1, 3, 5,..., 1st subframe
|
||||
#define LINE_IMGODD2 10 // interlaced image odd 1, 3, 5,..., 2nd subframe
|
||||
#define LINE_DARK 1 // dark line
|
||||
#define LINE_IMG 2 // progressive image 0, 1, 2,...
|
||||
|
||||
extern u8 ScanlineType[MAXLINE];
|
||||
|
||||
extern int DispDev; // current display device
|
||||
extern sVmode CurVmode; // copy of current videomode table
|
||||
//extern int LayerMode; // current layer mode (LAYERMODE_*)
|
||||
extern volatile int ScanLine; // current scan line 1...
|
||||
extern volatile u32 Frame; // frame counter
|
||||
extern volatile int BufInx; // current buffer set (0..1)
|
||||
extern volatile Bool VSync; // current scan line is vsync or dark
|
||||
|
||||
// line buffers
|
||||
extern ALIGNED u8 LineBuf1[DBUF_MAX]; // scanline 1 image data
|
||||
extern ALIGNED u8 LineBuf2[DBUF_MAX]; // scanline 2 image data
|
||||
extern int LineBufSize[LAYERS_MAX]; // size of data buffers
|
||||
extern u32 LineBufHsBp[4]; // HSYNC ... back porch-1 ... IRQ command ... image command
|
||||
extern u32 LineBufFp; // front porch+1
|
||||
extern u32 LineBufDark[2]; // HSYNC ... dark line
|
||||
extern u32 LineBufSync[10]; // vertical synchronization
|
||||
// interlaced (5x half scanlines):
|
||||
// 2x half synchronization (HSYNC pulse/2 ... line dark/2)
|
||||
// 2x vertical synchronization (invert line dark/2 ... invert HSYNC pulse)
|
||||
// 1x half synchronization (HSYNC pulse/2 ... line dark/2)
|
||||
// progressive: 1x scanline with vertical synchronization (invert line dark ... invert HSYNC pulse)
|
||||
|
||||
extern ALIGNED u8 LineBuf0[BLACK_MAX]; // line buffer with black color (used to clear rest of scanline)
|
||||
extern u32 LineBufHsBp[4]; // HSYNC ... back porch-1 ... IRQ command ... image command
|
||||
extern u32 LineBufFp; // front porch+1
|
||||
extern u32 LineBufDark[2]; // HSYNC ... dark line
|
||||
extern u32 LineBufSync[10]; // vertical synchronization
|
||||
|
||||
// control buffers
|
||||
extern u32 CtrlBuf1[CBUF_MAX]; // control pairs: u32 count, read address (must be terminated with [0,0])
|
||||
extern u32 CtrlBuf2[CBUF_MAX]; // control pairs: u32 count, read address (must be terminated with [0,0])
|
||||
|
||||
extern int CtrlBufSize[LAYERS_MAX]; // size of control buffers
|
||||
|
||||
// render font pixel mask
|
||||
extern u32 RenderTextMask[512];
|
||||
|
||||
// fill memory buffer with u32 words
|
||||
// buf ... data buffer, must be 32-bit aligned
|
||||
// data ... data word to store
|
||||
// num ... number of 32-bit words (= number of bytes/4)
|
||||
// Returns new destination address.
|
||||
extern "C" u32* MemSet4(u32* buf, u32 data, int num);
|
||||
|
||||
// blit scanline using key color
|
||||
// dst ... destination buffer
|
||||
// src ... source buffer
|
||||
// w ... width
|
||||
// key ... key color
|
||||
extern "C" void BlitKey(u8* dst, u8* src, int w, u8 key);
|
||||
|
||||
// render layers with sprites LAYERMODE_SPRITE*
|
||||
// dbuf ... pointer to data buffer
|
||||
// y ... coordinate of scanline
|
||||
// scr ... pointer to layer screen structure sLayer
|
||||
extern "C" void RenderSprite(u8* dbuf, int y, sLayer* scr);
|
||||
|
||||
// render layers with fast sprites LAYERMODE_FASTSPRITE*
|
||||
// cbuf ... pointer to control buffer
|
||||
// y ... coordinate of scanline
|
||||
// scr ... pointer to layer screen structure sLayer
|
||||
// buf ... pointer to destination data buffer with transparent color
|
||||
// Output new pointer to control buffer.
|
||||
extern "C" u32* RenderFastSprite(u32* cbuf, int y, sLayer* scr, u8* buf);
|
||||
|
||||
// render layers with transformation matrix LAYERMODE_PERSP*
|
||||
// R0 ... dbuf pointer to data buffer
|
||||
// R1 ... y coordinate of scanline (relative in destination image)
|
||||
// R2 ... scr pointer to layer screen structure sLayer
|
||||
extern "C" void RenderPersp(u8* dbuf, int y, sLayer* scr);
|
||||
|
||||
// render layers double pixel with transformation matrix LAYERMODE_PERSP2*
|
||||
// R0 ... dbuf pointer to data buffer
|
||||
// R1 ... y coordinate of scanline (relative in destination image)
|
||||
// R2 ... scr pointer to layer screen structure sLayer
|
||||
extern "C" void RenderPersp2(u8* dbuf, int y, sLayer* scr);
|
||||
|
||||
// render scanline
|
||||
// cbuf ... control buffer
|
||||
// dbuf ... data buffer (pixel data)
|
||||
// line ... current line 0..
|
||||
// pixnum ... total pixels (must be multiple of 4)
|
||||
// Returns new pointer to control buffer
|
||||
extern "C" u32* Render(u32* cbuf, u8* dbuf, int line, int pixnum);
|
||||
|
||||
// initialize scanline type table
|
||||
void ScanlineTypeInit(const sVmode* v);
|
||||
|
||||
// print table if scanline types
|
||||
void ScanlineTypePrint(const u8* scan, int lines);
|
||||
|
||||
// initialize videomode (returns False on bad configuration)
|
||||
// - All layer modes must use same layer program (LAYERMODE_BASE = overlapped layers are OFF)
|
||||
void VgaInit(const sVmode* vmode); //, u8 layer1mode=LAYERMODE_BASE, u8 layer2mode=LAYERMODE_BASE, u8 layer3mode=LAYERMODE_BASE);
|
||||
|
||||
// VGA core
|
||||
void VgaCore();
|
||||
|
||||
// request to initialize VGA videomode, NULL=only stop driver (wait to initialization completes)
|
||||
void VgaInitReq(const sVmode* vmode);
|
||||
|
||||
// execute core 1 remote function
|
||||
void Core1Exec(void (*fnc)());
|
||||
|
||||
// check if core 1 is busy (executing remote function)
|
||||
Bool Core1Busy();
|
||||
|
||||
// wait if core 1 is busy (executing remote function)
|
||||
void Core1Wait();
|
||||
void VgaInit(const sVmode* vmode, u8* buf, int width, int height, int stride); //, u8 layer1mode=LAYERMODE_BASE, u8 layer2mode=LAYERMODE_BASE, u8 layer3mode=LAYERMODE_BASE);
|
||||
|
||||
// wait for VSync scanline
|
||||
void WaitVSync();
|
||||
|
|
|
|||
|
|
@ -1,90 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA sprites
|
||||
//
|
||||
// ****************************************************************************
|
||||
// Takes 100 bytes
|
||||
|
||||
#include "define.h" // common definitions of C and ASM
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.BlitKey, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
// [6,7] blit macro (4 instructions, 8 bytes)
|
||||
.macro blitkey n
|
||||
ldrb r4,[r1,#\n] // [2] load 1 pixel
|
||||
cmp r4,r3 // [1] is it transparent color?
|
||||
beq 2f // [1,2] pixel is transparent
|
||||
strb r4,[r0,#\n] // [2] write 1 pixel
|
||||
2:
|
||||
.endm
|
||||
|
||||
// blit scanline using key color
|
||||
// dst ... destination buffer
|
||||
// src ... source buffer
|
||||
// w ... width
|
||||
// key ... key color
|
||||
//extern "C" void BlitKey(u8* dst, u8* src, int w, u8 key);
|
||||
|
||||
.thumb_func
|
||||
.global BlitKey
|
||||
BlitKey:
|
||||
|
||||
// push registers
|
||||
push {r4,lr}
|
||||
|
||||
// Registers:
|
||||
// R0 ... destination buffer
|
||||
// R1 ... source buffer
|
||||
// R2 ... width counter
|
||||
// R3 ... key color
|
||||
// R4 ... (temporary)
|
||||
|
||||
// save start of destination buffer
|
||||
mov lr,r0 // start buffer
|
||||
|
||||
// get number of pixels aligned to 8 bytes
|
||||
lsrs r4,r2,#3 // number of pixels / 8
|
||||
lsls r4,#3 // number of pixels aligned to 8 bytes down -> R4
|
||||
eors r2,r4 // number of pixels last 3 bits (modulo 8)
|
||||
|
||||
// shift pointers to last 8-byte group
|
||||
add r0,r4 // shift destination pointer to the end
|
||||
add r1,r4 // shift source pointer to the end
|
||||
|
||||
// jump to blit rest of pixels in last 8-byte group
|
||||
adr r4,3f // get address of label '3:' (must be word aligned)
|
||||
lsls r2,#3 // *8, convert number of pixels to offset of blit macro (1 macro is 8 bytes long)
|
||||
subs r4,r2 // subtract offset of first valid blit macro
|
||||
adds r4,#1 // set bit 0 - flag to use thumb instructions
|
||||
bx r4 // jump into loop
|
||||
|
||||
// ---- [53..61 per loop] blend pixels, speed 6.625..7.625 clock cycles per pixel
|
||||
|
||||
.align 2 // address of label '3:' must be word aligned (32 bits)
|
||||
|
||||
// [2] shift pointers 8 bytes down
|
||||
1: subs r0,#8 // [1] shift destination pointer by 8 bytes down
|
||||
subs r1,#8 // [1] shift source pointer by 8 bytes down
|
||||
|
||||
// [48..56] blit 8 pixels (32 instructions)
|
||||
blitkey 7 // [6,7] blit pixel 7
|
||||
blitkey 6 // [6,7] blit pixel 6
|
||||
blitkey 5 // [6,7] blit pixel 5
|
||||
blitkey 4 // [6,7] blit pixel 4
|
||||
blitkey 3 // [6,7] blit pixel 3
|
||||
blitkey 2 // [6,7] blit pixel 2
|
||||
blitkey 1 // [6,7] blit pixel 1
|
||||
blitkey 0 // [6,7] blit pixel 0
|
||||
|
||||
// this address must be word aligned
|
||||
|
||||
// [2,3] next 8 pixels
|
||||
3: cmp r0,lr // [1] start address reached?
|
||||
bhi 1b // [1,2] not start address yet
|
||||
|
||||
// pop registers and return from function
|
||||
9: pop {r4,pc}
|
||||
|
|
@ -5,59 +5,23 @@
|
|||
//
|
||||
// VGA configuration
|
||||
//
|
||||
// file derived from the PicoVGA project
|
||||
// https://github.com/Panda381/PicoVGA
|
||||
// by Miroslav Nemecek
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
// === Configuration
|
||||
#define LAYERS 1 //4 // total layers 1..4 (1 base layer + 3 overlapped layers)
|
||||
#define SEGMAX 8 // max. number of video segment per video strip (size of 1 sSegm = 28 bytes)
|
||||
#define STRIPMAX 8 // max. number of video strips (size of 1 sStrip = sSegm size*SEGMAX+4 = 228 bytes)
|
||||
// size of sScreen = sStrip size*STRIPMAX+4 = 1828 bytes
|
||||
|
||||
#define MAXX 320 //640 // max. resolution in X direction (must be power of 4)
|
||||
#define MAXY 240 //480 // max. resolution in Y direction
|
||||
|
||||
#define MAXLINE 700 // max. number of scanlines (including sync and dark lines)
|
||||
#define MAXLINE 525 //700 // max. number of scanlines (including sync and dark lines)
|
||||
|
||||
// === Scanline render buffers (800 pixels: default size of buffers = 2*4*(800+8+800+24)+800 = 13856 bytes
|
||||
// Requirements by format, base layer 0, 1 wrap X segment:
|
||||
// GF_GRAPH8 ... control buffer 16 bytes
|
||||
// GF_TILE8 ... control buffer "width"+8 bytes
|
||||
// GF_TILE16 ... control buffer "width/2"+8 bytes
|
||||
// GF_TILE32 ... control buffer "width/4"+8 bytes
|
||||
// GF_TILE64 ... control buffer "width/8"+8 bytes
|
||||
// GF_PROGRESS ... control buffer 24 bytes
|
||||
// other formats: data buffer "width" bytes, control buffer 16 bytes
|
||||
#define DBUF0_MAX (MAXX+8) // max. size of data buffer of layer 0
|
||||
#define CBUF0_MAX ((MAXX+24)/4) // max. size of control buffer of layer 0
|
||||
// GF_GRAPH8 ... control buffer 4*4=16 bytes
|
||||
#define CBUF_MAX 8 //((MAXX+24)/4) // max. size of control buffer of layer 0
|
||||
|
||||
// Requirements by format, overlapped layer 1..3:
|
||||
// LAYERMODE_SPRITE* ... data buffer "width"+4 bytes, control buffer 24 bytes
|
||||
// LAYERMODE_FASTSPRITE* ... data buffer "width"+4 bytes, control buffer up to "width*2"+16 bytes
|
||||
// other formats ... data buffer 4 bytes, control buffer 24 bytes
|
||||
#define DBUF1_MAX (MAXX+8) // max. size of data buffer of layer 1
|
||||
#define CBUF1_MAX ((MAXX+24)/4) // max. size of control buffer of layer 1
|
||||
|
||||
#define DBUF2_MAX (MAXX+8) // max. size of data buffer of layer 2
|
||||
#define CBUF2_MAX ((MAXX+24)/4) // max. size of control buffer of layer 2
|
||||
|
||||
#define DBUF3_MAX (MAXX+8) // max. size of data buffer of layer 3
|
||||
#define CBUF3_MAX ((MAXX+24)/4) // max. size of control buffer of layer 3
|
||||
|
||||
#if LAYERS==1
|
||||
#define DBUF_MAX DBUF0_MAX // max. size of data buffer
|
||||
#define CBUF_MAX CBUF0_MAX // max. size of control buffer
|
||||
#elif LAYERS==2
|
||||
#define DBUF_MAX (DBUF0_MAX+DBUF1_MAX) // max. size of data buffer
|
||||
#define CBUF_MAX (CBUF0_MAX+CBUF1_MAX) // max. size of control buffer
|
||||
#elif LAYERS==3
|
||||
#define DBUF_MAX (DBUF0_MAX+DBUF1_MAX+DBUF2_MAX) // max. size of data buffer
|
||||
#define CBUF_MAX (CBUF0_MAX+CBUF1_MAX+CBUF2_MAX) // max. size of control buffer
|
||||
#elif LAYERS==4
|
||||
#define DBUF_MAX (DBUF0_MAX+DBUF1_MAX+DBUF2_MAX+DBUF3_MAX) // max. size of data buffer
|
||||
#define CBUF_MAX (CBUF0_MAX+CBUF1_MAX+CBUF2_MAX+CBUF3_MAX) // max. size of control buffer
|
||||
#else
|
||||
#error Unsupported number of layers!
|
||||
#endif
|
||||
|
||||
// === VGA port pins
|
||||
// GP0 ... VGA B0 blue
|
||||
|
|
@ -75,39 +39,21 @@
|
|||
#define VGA_GPIO_LAST (VGA_GPIO_FIRST+VGA_GPIO_NUM-1) // last VGA GPIO
|
||||
#define VGA_GPIO_SYNC VGA_SYNCBASE // VGA SYNC GPIO
|
||||
|
||||
// === VGA PIO program
|
||||
#define BASE_OFFSET 17 // offset of base layer program
|
||||
|
||||
// VGA PIO and state machines
|
||||
#define VGA_PIO pio0 // VGA PIO
|
||||
#define VGA_SM0 0 // VGA state machine of base layer 0
|
||||
#define VGA_SM1 1 // VGA state machine of overlapped layer 1
|
||||
#define VGA_SM2 2 // VGA state machine of overlapped layer 2
|
||||
#define VGA_SM3 3 // VGA state machine of overlapped layer 3
|
||||
#define VGA_SM(layer) (VGA_SM0+(layer)) // VGA state machine of the layer
|
||||
|
||||
#if LAYERS==1
|
||||
// LAYERS==1
|
||||
#define VGA_SMALL B0 // mask of all state machines
|
||||
#elif LAYERS==2
|
||||
#define VGA_SMALL (B0+B1) // mask of all state machines
|
||||
#elif LAYERS==3
|
||||
#define VGA_SMALL (B0+B1+B2) // mask of all state machines
|
||||
#elif LAYERS==4
|
||||
#define VGA_SMALL (B0+B1+B2+B3) // mask of all state machines
|
||||
#else
|
||||
#error Unsupported number of layers!
|
||||
#endif
|
||||
|
||||
|
||||
// VGA DMA
|
||||
#define VGA_DMA 2 // VGA DMA base channel
|
||||
#define VGA_DMA_CB0 (VGA_DMA+0) // VGA DMA channel - control block of base layer
|
||||
#define VGA_DMA_PIO0 (VGA_DMA+1) // VGA DMA channel - copy data of base layer to PIO (raises IRQ0 on quiet)
|
||||
#define VGA_DMA_CB1 (VGA_DMA+2) // VGA DMA channel - control block of overlapped layer 1
|
||||
#define VGA_DMA_PIO1 (VGA_DMA+3) // VGA DMA channel - copy data of overlapped layer 1 to PIO
|
||||
#define VGA_DMA_CB2 (VGA_DMA+4) // VGA DMA channel - control block of overlapped layer 1
|
||||
#define VGA_DMA_PIO2 (VGA_DMA+5) // VGA DMA channel - copy data of overlapped layer 2 to PIO
|
||||
#define VGA_DMA_CB3 (VGA_DMA+6) // VGA DMA channel - control block of overlapped layer 1
|
||||
#define VGA_DMA_PIO3 (VGA_DMA+7) // VGA DMA channel - copy data of overlapped layer 3 to PIO
|
||||
|
||||
#define VGA_DMA_CB(layer) (VGA_DMA_CB0+(layer)*2) // VGA DMA control channel of the layer
|
||||
#define VGA_DMA_PIO(layer) (VGA_DMA_PIO0+(layer)*2) // VGA DMA data channel of the layer
|
||||
|
||||
#define VGA_DMA_NUM (LAYERS*2) // number of used DMA channels
|
||||
#define VGA_DMA_FIRST VGA_DMA // first used DMA
|
||||
|
|
|
|||
|
|
@ -1,505 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA layers
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#include "include.h"
|
||||
|
||||
// layer program descriptors
|
||||
const sLayerProg LayerProg[LAYERPROG_NUM] = {
|
||||
|
||||
// LAYERPROG_BASE base layer
|
||||
{
|
||||
.ins=vga_program_instructions, // pointer to program instructions
|
||||
.prg=&vga_program, // pointer to program descriptor
|
||||
.length=vga_program.length, // program length (number of instructions)
|
||||
.wrap_target=vga_wrap_target, // offset of wrap target
|
||||
.wrap=vga_wrap, // offset of wrap end
|
||||
.idle=vga_offset_entry, // offset of idle
|
||||
.entry=vga_offset_entry, // offset of entry
|
||||
.maxidle=2, // max. offset of idle to detect end of job
|
||||
.extranum=2, // number of extra offsets
|
||||
.extra={ // extra offsets, pairs: offset, CPP-correction
|
||||
vga_offset_extra1, 2,
|
||||
vga_offset_extra2, 2,
|
||||
},
|
||||
},
|
||||
|
||||
// LAYERPROG_KEY layer with key color
|
||||
{
|
||||
.ins=keylayer_program_instructions, // pointer to program instructions
|
||||
.prg=&keylayer_program, // pointer to program descriptor
|
||||
.length=keylayer_program.length, // program length (number of instructions)
|
||||
.wrap_target=keylayer_wrap_target, // offset of wrap target
|
||||
.wrap=keylayer_wrap, // offset of wrap end
|
||||
.idle=keylayer_offset_idle, // offset of idle
|
||||
.entry=keylayer_offset_entry, // offset of entry
|
||||
.maxidle=2, // max. offset of idle to detect end of job
|
||||
.extranum=1, // number of extra offsets
|
||||
.extra={ // extra offsets, pairs: offset, CPP-correction
|
||||
keylayer_offset_extra1, 6,
|
||||
},
|
||||
},
|
||||
|
||||
// LAYERPROG_BLACK layer with black key color
|
||||
{
|
||||
.ins=blacklayer_program_instructions, // pointer to program instructions
|
||||
.prg=&blacklayer_program, // pointer to program descriptor
|
||||
.length=blacklayer_program.length, // program length (number of instructions)
|
||||
.wrap_target=blacklayer_wrap_target, // offset of wrap target
|
||||
.wrap=blacklayer_wrap, // offset of wrap end
|
||||
.idle=blacklayer_offset_idle, // offset of idle
|
||||
.entry=blacklayer_offset_entry, // offset of entry
|
||||
.maxidle=2, // max. offset of idle to detect end of job
|
||||
.extranum=2, // number of extra offsets
|
||||
.extra={ // extra offsets, pairs: offset, CPP-correction
|
||||
blacklayer_offset_extra1, 4,
|
||||
blacklayer_offset_extra2, 3,
|
||||
},
|
||||
},
|
||||
|
||||
// LAYERPROG_WHITE layer with white key color
|
||||
{
|
||||
.ins=whitelayer_program_instructions, // pointer to program instructions
|
||||
.prg=&whitelayer_program, // pointer to program descriptor
|
||||
.length=whitelayer_program.length, // program length (number of instructions)
|
||||
.wrap_target=whitelayer_wrap_target, // offset of wrap target
|
||||
.wrap=whitelayer_wrap, // offset of wrap end
|
||||
.idle=whitelayer_offset_idle, // offset of idle
|
||||
.entry=whitelayer_offset_entry, // offset of entry
|
||||
.maxidle=2, // max. offset of idle to detect end of job
|
||||
.extranum=1, // number of extra offsets
|
||||
.extra={ // extra offsets, pairs: offset, CPP-correction
|
||||
whitelayer_offset_extra1, 4,
|
||||
},
|
||||
},
|
||||
|
||||
// LAYERPROG_MONO layer with mono pattern or simple color
|
||||
{
|
||||
.ins=monolayer_program_instructions, // pointer to program instructions
|
||||
.prg=&monolayer_program, // pointer to program descriptor
|
||||
.length=monolayer_program.length, // program length (number of instructions)
|
||||
.wrap_target=monolayer_wrap_target, // offset of wrap target
|
||||
.wrap=monolayer_wrap, // offset of wrap end
|
||||
.idle=monolayer_offset_idle, // offset of idle
|
||||
.entry=monolayer_offset_entry, // offset of entry
|
||||
.maxidle=2, // max. offset of idle to detect end of job
|
||||
.extranum=2, // number of extra offsets
|
||||
.extra={ // extra offsets, pairs: offset, CPP-correction
|
||||
monolayer_offset_extra1, 4,
|
||||
monolayer_offset_extra2, 2,
|
||||
},
|
||||
},
|
||||
|
||||
// LAYERPROG_RLE layer with RLE compression
|
||||
{
|
||||
.ins=rlelayer_program_instructions, // pointer to program instructions
|
||||
.prg=&rlelayer_program, // pointer to program descriptor
|
||||
.length=rlelayer_program.length, // program length (number of instructions)
|
||||
.wrap_target=rlelayer_wrap_target, // offset of wrap target
|
||||
.wrap=rlelayer_wrap, // offset of wrap end
|
||||
.idle=rlelayer_offset_idle, // offset of idle
|
||||
.entry=rlelayer_offset_entry, // offset of entry
|
||||
.maxidle=2, // max. offset of idle to detect end of job
|
||||
.extranum=7, // number of extra offsets
|
||||
.extra={ // extra offsets, pairs: offset, CPP-correction
|
||||
rlelayer_offset_extra1, 1,
|
||||
rlelayer_offset_extra2, 3,
|
||||
rlelayer_offset_extra3, 2,
|
||||
rlelayer_offset_extra4, 2,
|
||||
rlelayer_offset_extra5, 3,
|
||||
rlelayer_offset_extra6, 2,
|
||||
rlelayer_offset_extra7, 3,
|
||||
},
|
||||
},
|
||||
|
||||
};
|
||||
|
||||
// current layer program of overlapped layers
|
||||
u8 LayerProgInx; // index of current layer program (LAYERPROG_*)
|
||||
sLayerProg CurLayerProg; // copy of current layer program
|
||||
|
||||
// layer mode descriptors
|
||||
const sLayerMode LayerMode[LAYERMODE_NUM] = {
|
||||
|
||||
// LAYERMODE_BASE base layer
|
||||
{
|
||||
.prog=LAYERPROG_BASE, // layer program (LAYERPROG_*)
|
||||
.mincpp=2, // minimal clock cycles per pixel
|
||||
.maxcpp=17, // maximal clock cycles per pixel
|
||||
},
|
||||
|
||||
// LAYERMODE_KEY layers with key color
|
||||
{
|
||||
.prog=LAYERPROG_KEY, // layer program (LAYERPROG_*)
|
||||
.mincpp=6, // minimal clock cycles per pixel
|
||||
.maxcpp=37, // maximal clock cycles per pixel
|
||||
},
|
||||
|
||||
// LAYERMODE_BLACK layers with black key color
|
||||
{
|
||||
.prog=LAYERPROG_BLACK, // layer program (LAYERPROG_*)
|
||||
.mincpp=4, // minimal clock cycles per pixel
|
||||
.maxcpp=34, // maximal clock cycles per pixel
|
||||
},
|
||||
|
||||
// LAYERMODE_WHITE layers with white key color
|
||||
{
|
||||
.prog=LAYERPROG_WHITE, // layer program (LAYERPROG_*)
|
||||
.mincpp=4, // minimal clock cycles per pixel
|
||||
.maxcpp=35, // maximal clock cycles per pixel
|
||||
},
|
||||
|
||||
// LAYERMODE_MONO layers with mono pattern
|
||||
{
|
||||
.prog=LAYERPROG_MONO, // layer program (LAYERPROG_*)
|
||||
.mincpp=4, // minimal clock cycles per pixel
|
||||
.maxcpp=35, // maximal clock cycles per pixel
|
||||
},
|
||||
|
||||
// LAYERMODE_COLOR layers with simple color
|
||||
{
|
||||
.prog=LAYERPROG_MONO, // layer program (LAYERPROG_*)
|
||||
.mincpp=2, // minimal clock cycles per pixel
|
||||
.maxcpp=33, // maximal clock cycles per pixel
|
||||
},
|
||||
|
||||
// LAYERMODE_RLE layers with RLE compression
|
||||
{
|
||||
.prog=LAYERPROG_RLE, // layer program (LAYERPROG_*)
|
||||
.mincpp=3, // minimal clock cycles per pixel
|
||||
.maxcpp=32, // maximal clock cycles per pixel
|
||||
},
|
||||
|
||||
// LAYERMODE_SPRITEKEY layers with sprites with key color
|
||||
{
|
||||
.prog=LAYERPROG_KEY, // layer program (LAYERPROG_*)
|
||||
.mincpp=6, // minimal clock cycles per pixel
|
||||
.maxcpp=37, // maximal clock cycles per pixel
|
||||
},
|
||||
|
||||
// LAYERMODE_SPRITEBLACK layers with sprites with black key color
|
||||
{
|
||||
.prog=LAYERPROG_BLACK, // layer program (LAYERPROG_*)
|
||||
.mincpp=4, // minimal clock cycles per pixel
|
||||
.maxcpp=34, // maximal clock cycles per pixel
|
||||
},
|
||||
|
||||
// LAYERMODE_SPRITEWHITE layers with sprites with white key color
|
||||
{
|
||||
.prog=LAYERPROG_WHITE, // layer program (LAYERPROG_*)
|
||||
.mincpp=4, // minimal clock cycles per pixel
|
||||
.maxcpp=35, // maximal clock cycles per pixel
|
||||
},
|
||||
|
||||
// LAYERMODE_FASTSPRITEKEY layers with fast sprites with key color
|
||||
{
|
||||
.prog=LAYERPROG_KEY, // layer program (LAYERPROG_*)
|
||||
.mincpp=6, // minimal clock cycles per pixel
|
||||
.maxcpp=37, // maximal clock cycles per pixel
|
||||
},
|
||||
|
||||
// LAYERMODE_FASTSPRITEBLACK layers with fast sprites with black key color
|
||||
{
|
||||
.prog=LAYERPROG_BLACK, // layer program (LAYERPROG_*)
|
||||
.mincpp=4, // minimal clock cycles per pixel
|
||||
.maxcpp=34, // maximal clock cycles per pixel
|
||||
},
|
||||
|
||||
// LAYERMODE_FASTSPRITEWHITE layers with fast sprites with white key color
|
||||
{
|
||||
.prog=LAYERPROG_WHITE, // layer program (LAYERPROG_*)
|
||||
.mincpp=4, // minimal clock cycles per pixel
|
||||
.maxcpp=35, // maximal clock cycles per pixel
|
||||
},
|
||||
|
||||
// LAYERMODE_PERSPKEY layer with key color and image with transformation matrix
|
||||
{
|
||||
.prog=LAYERPROG_KEY, // layer program (LAYERPROG_*)
|
||||
.mincpp=6, // minimal clock cycles per pixel
|
||||
.maxcpp=37, // maximal clock cycles per pixel
|
||||
},
|
||||
|
||||
// LAYERMODE_PERSPBLACK layer with black key color and image with transformation matrix
|
||||
{
|
||||
.prog=LAYERPROG_BLACK, // layer program (LAYERPROG_*)
|
||||
.mincpp=4, // minimal clock cycles per pixel
|
||||
.maxcpp=34, // maximal clock cycles per pixel
|
||||
},
|
||||
|
||||
// LAYERMODE_PERSPWHITE layer with white key color and image with transformation matrix
|
||||
{
|
||||
.prog=LAYERPROG_WHITE, // layer program (LAYERPROG_*)
|
||||
.mincpp=4, // minimal clock cycles per pixel
|
||||
.maxcpp=35, // maximal clock cycles per pixel
|
||||
},
|
||||
|
||||
// LAYERMODE_PERSP2KEY layer with key color and double pixel image with transformation matrix
|
||||
{
|
||||
.prog=LAYERPROG_KEY, // layer program (LAYERPROG_*)
|
||||
.mincpp=6, // minimal clock cycles per pixel
|
||||
.maxcpp=37, // maximal clock cycles per pixel
|
||||
},
|
||||
|
||||
// LAYERMODE_PERSP2BLACK layer with black key color and double pixel image with transformation matrix
|
||||
{
|
||||
.prog=LAYERPROG_BLACK, // layer program (LAYERPROG_*)
|
||||
.mincpp=4, // minimal clock cycles per pixel
|
||||
.maxcpp=34, // maximal clock cycles per pixel
|
||||
},
|
||||
|
||||
// LAYERMODE_PERSP2WHITE layer with white key color and double pixel image with transformation matrix
|
||||
{
|
||||
.prog=LAYERPROG_WHITE, // layer program (LAYERPROG_*)
|
||||
.mincpp=4, // minimal clock cycles per pixel
|
||||
.maxcpp=35, // maximal clock cycles per pixel
|
||||
},
|
||||
};
|
||||
|
||||
// current layer mode of layers
|
||||
u8 LayerModeInx[LAYERS]; // index of current layer mode (LAYERMODE_*)
|
||||
sLayerMode CurLayerMode[LAYERS]; // copy of current layer mode
|
||||
|
||||
// current layer screens
|
||||
sLayer LayerScreen[LAYERS]; // layer screens
|
||||
|
||||
u8 LayerMask; // mask of active layers
|
||||
|
||||
// index of first pin of layer (base layer should stay VGA_GPIO_FIRST)
|
||||
u8 LayerFirstPin[LAYERS_MAX] = { VGA_GPIO_FIRST, VGA_GPIO_FIRST, VGA_GPIO_FIRST, VGA_GPIO_FIRST};
|
||||
|
||||
// number of pins of overlapped layer (base layer should stay VGA_GPIO_OUTNUM)
|
||||
u8 LayerNumPin[LAYERS_MAX] = { VGA_GPIO_OUTNUM, VGA_GPIO_OUTNUM, VGA_GPIO_OUTNUM, VGA_GPIO_OUTNUM};
|
||||
|
||||
// set overlapped layer 1..3 ON
|
||||
void LayerOn(u8 inx)
|
||||
{
|
||||
__dmb();
|
||||
LayerScreen[inx].on = True;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set overlapped layer 1..3 OFF
|
||||
void LayerOff(u8 inx)
|
||||
{
|
||||
__dmb();
|
||||
LayerScreen[inx].on = False;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set coordinate X of overlapped layer
|
||||
void LayerSetX(u8 inx, s16 x)
|
||||
{
|
||||
sLayer* lay = &LayerScreen[inx];
|
||||
s32 cppx = lay->cpp*x; // initial delay
|
||||
if (cppx < 0) cppx = 0;
|
||||
u32 w = lay->w; // image width
|
||||
u32 init = 0; // init word
|
||||
|
||||
// prepare init word
|
||||
switch (lay->mode)
|
||||
{
|
||||
case LAYERMODE_PERSP2KEY: // layer with key color and double pixel image with transformation matrix
|
||||
case LAYERMODE_PERSPKEY: // layer with key color and image with transformation matrix
|
||||
case LAYERMODE_FASTSPRITEKEY: // layer with fast sprites with key color
|
||||
case LAYERMODE_SPRITEKEY: // layer with sprites with key color
|
||||
case LAYERMODE_KEY: // layer with key color
|
||||
init = VGAKEY(cppx, w, (lay->keycol & 0xff));
|
||||
break;
|
||||
|
||||
case LAYERMODE_PERSP2BLACK: // layer with black key color and double pixel image with transformation matrix
|
||||
case LAYERMODE_PERSPBLACK: // layer with black key color and image with transformation matrix
|
||||
case LAYERMODE_FASTSPRITEBLACK: // layer with fast sprites with black key color
|
||||
case LAYERMODE_SPRITEBLACK: // layer with sprites with black key color
|
||||
case LAYERMODE_BLACK: // layer with black key color
|
||||
init = VGABLACK(cppx, w);
|
||||
break;
|
||||
|
||||
case LAYERMODE_PERSP2WHITE: // layer with white key color and double pixel image with transformation matrix
|
||||
case LAYERMODE_PERSPWHITE: // layer with white key color and image with transformation matrix
|
||||
case LAYERMODE_FASTSPRITEWHITE: // layer with fast sprites with white key color
|
||||
case LAYERMODE_SPRITEWHITE: // layer with sprites with white key color
|
||||
case LAYERMODE_WHITE: // layer with white key color
|
||||
init = VGAWHITE(cppx, w);
|
||||
break;
|
||||
|
||||
case LAYERMODE_MONO: // layer with mono pattern
|
||||
init = VGAMONO(cppx, w, (lay->keycol & 0xff));
|
||||
break;
|
||||
|
||||
case LAYERMODE_COLOR: // layer with simple color
|
||||
init = VGACOLOR(cppx, w);
|
||||
break;
|
||||
|
||||
case LAYERMODE_RLE: // layer with RLE compression
|
||||
init = VGARLE(cppx);
|
||||
break;
|
||||
}
|
||||
lay->init = init; // init word
|
||||
lay->x = x; // start X coordinate
|
||||
}
|
||||
|
||||
// set coordinate Y of overlapped layer
|
||||
void LayerSetY(u8 inx, s16 y)
|
||||
{
|
||||
sLayer* lay = &LayerScreen[inx];
|
||||
lay->y = y;
|
||||
}
|
||||
|
||||
// set width of image of overlapped layer
|
||||
// Uses auto pitch wb (full line). Set custom wb after calling this function.
|
||||
void LayerSetW(u8 inx, u16 w)
|
||||
{
|
||||
sLayer* lay = &LayerScreen[inx];
|
||||
lay->w = w; // image width
|
||||
Bool mono = (lay->mode == LAYERMODE_MONO);
|
||||
lay->trans = mono ? (((w/8)+3)/4) : (w/4); // transfer count
|
||||
lay->wb = mono ? (w/8) : w; // width bytes
|
||||
LayerSetX(inx, lay->x); // update init word
|
||||
}
|
||||
|
||||
// set height of image of overlapped layer
|
||||
void LayerSetH(u8 inx, u16 h)
|
||||
{
|
||||
sLayer* lay = &LayerScreen[inx];
|
||||
lay->h = h;
|
||||
}
|
||||
|
||||
// setup overlapped layer 1..3 (not for sprites and not for perspective mode)
|
||||
// inx ... layer index 1..3
|
||||
// img ... pointer to image data
|
||||
// vmode ... pointer to initialized video configuration
|
||||
// w ... image width in pixels (must be multiple of 4)
|
||||
// h ... image height
|
||||
// col ... key color (needed for LAYERMODE_KEY and LAYERMODE_MONO layer mode)
|
||||
// par ... additional data (RLE index table, integer transformation matrix)
|
||||
// Use these functions after layer setup: LayerSetX, LayerSetY, LayerOn
|
||||
void LayerSetup(u8 inx, const u8* img, const sVmode* vmode, u16 w, u16 h, u8 col /* = 0 */, const void* par /* = NULL */)
|
||||
{
|
||||
LayerOff(inx); // set layer OFF
|
||||
sLayer* lay = &LayerScreen[inx]; // get pointer to layer
|
||||
lay->img = img; // pointer to image data
|
||||
lay->par = par; // additional parameter
|
||||
lay->keycol = col | ((u16)col << 8) | ((u32)col << 16) | ((u32)col << 24); // key color
|
||||
lay->x = 0; // X coordinate
|
||||
lay->y = 0; // Y coordinate
|
||||
lay->h = h; // height of image
|
||||
lay->spritenum = 0; // number of sprites
|
||||
lay->cpp = vmode->cpp; // save clocks per pixel
|
||||
lay->mode = vmode->mode[inx]; // layer mode
|
||||
LayerSetW(inx, w); // set width of image, update parameters init, trans and wb
|
||||
}
|
||||
|
||||
// setup overlapped layer 1..3 for LAYERMODE_PERSP* modes
|
||||
// inx ... layer index 1..3
|
||||
// img ... pointer to source image data (image width and height must be power of 2)
|
||||
// vmode ... pointer to initialized video configuration
|
||||
// w ... destination image width in pixels (must be multiple of 4)
|
||||
// h ... destination image height
|
||||
// xbits ... number of bits of width of source image
|
||||
// ybits ... number of bits of height of source image
|
||||
// horiz ... horizon of perspective projection/4 (0=no perspecitve, <0 ceilling)
|
||||
// mat ... integer transformation matrix
|
||||
// col ... key color (needed for LAYERMODE_PERSPKEY layer mode)
|
||||
// Use these functions after layer setup: LayerSetX, LayerSetY, LayerOn
|
||||
void LayerPerspSetup(u8 inx, const u8* img, const sVmode* vmode, u16 w, u16 h, u8 xbits, u8 ybits,
|
||||
s8 horiz, const int* mat, u8 col /* = 0 */)
|
||||
{
|
||||
LayerSetup(inx, img, vmode, w, h, col, mat);
|
||||
sLayer* lay = &LayerScreen[inx]; // get pointer to layer
|
||||
lay->xbits = xbits;
|
||||
lay->ybits = ybits;
|
||||
lay->horiz = horiz;
|
||||
}
|
||||
|
||||
// setup overlapped layer 1..3 for LAYERMODE_SPRITE* and LAYERMODE_FASTSPRITE* modes
|
||||
// inx ... layer index 1..3
|
||||
// sprite ... pointer to list of sprites (array of pointers to sprites; sorted by X on LAYERMODE_FASTSPRITE* modes)
|
||||
// spritenum ... number of sprites in the list (to turn sprite off, you can set its coordinate Y out of the screen)
|
||||
// vmode ... pointer to initialized video configuration
|
||||
// x ... start coordinate X of area with sprites
|
||||
// y ... start coordinate Y of area with sprites
|
||||
// w ... width of area with sprites (must be multiple of 4)
|
||||
// h ... height of area with sprites
|
||||
// col ... key color (needed for LAYERMODE_SPRITEKEY and LAYERMODE_FASTSPRITEKEY layer mode)
|
||||
// Use functions LayerOn after layer setup.
|
||||
void LayerSpriteSetup(u8 inx, sSprite** sprite, u16 spritenum, const sVmode* vmode,
|
||||
s16 x, s16 y, u16 w, u16 h, u8 col /* = 0 */)
|
||||
{
|
||||
LayerSetup(inx, (const u8*)sprite, vmode, w, h, col);
|
||||
LayerSetX(inx, x);
|
||||
LayerSetY(inx, y);
|
||||
sLayer* lay = &LayerScreen[inx]; // get pointer to layer
|
||||
lay->spritenum = spritenum;
|
||||
}
|
||||
|
||||
// prepare array of start and length of lines (detects transparent pixels)
|
||||
// img ... image
|
||||
// x0 ... array of start of lines
|
||||
// w0 ... array of length of lines
|
||||
// w ... sprite width (slow sprite: max. width 255)
|
||||
// h ... sprite height
|
||||
// wb ... sprite pitch (bytes between lines)
|
||||
// col ... key color
|
||||
// fast ... fast sprite, divide start and length of line by 4
|
||||
void SpritePrepLines(const u8* img, u8* x0, u8* w0, u16 w, u16 h, u16 wb, u8 col, Bool fast)
|
||||
{
|
||||
int x1, x2, w2, y;
|
||||
const u8* d;
|
||||
|
||||
// loop through lines
|
||||
for (y = 0; y < h; y++)
|
||||
{
|
||||
// find start of line
|
||||
d = &img[y*wb];
|
||||
for (x1 = 0; x1 < w; x1++)
|
||||
{
|
||||
if (*d != col) break;
|
||||
d++;
|
||||
}
|
||||
|
||||
// find end of line
|
||||
d = &img[y*wb + w - 1];
|
||||
for (x2 = w; x2 > x1; x2--)
|
||||
{
|
||||
if (*d != col) break;
|
||||
d--;
|
||||
}
|
||||
|
||||
// prepare start and length
|
||||
w2 = x2 - x1;
|
||||
if (fast)
|
||||
{
|
||||
w2 += ((x2 + 3) & ~3) - x2;
|
||||
x1 /= 4;
|
||||
w2 = (w2 + 3)/4;
|
||||
}
|
||||
if (x1 > 255) x1 = 255;
|
||||
if (w2 > 255) w2 = 255;
|
||||
|
||||
// store start and length
|
||||
*x0++ = x1;
|
||||
*w0++ = w2;
|
||||
}
|
||||
}
|
||||
|
||||
// sort fast sprite list by X coordinate
|
||||
void SortSprite(sSprite** list, int num)
|
||||
{
|
||||
int i;
|
||||
sSprite* s;
|
||||
sSprite* s2;
|
||||
for (i = 0; i < num-1; i++)
|
||||
{
|
||||
s = list[i];
|
||||
s2 = list[i+1];
|
||||
if (s->x > s2->x)
|
||||
{
|
||||
list[i] = s2;
|
||||
list[i+1] = s;
|
||||
if (i > 0) i -= 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,195 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA layers
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#ifndef _VGA_LAYER_H
|
||||
#define _VGA_LAYER_H
|
||||
|
||||
// base layer commands
|
||||
#define VGADARK(num,col) (((u32)(vga_offset_dark+BASE_OFFSET)<<27) | ((u32)(num)<<8) | (u32)(col)) // assemble control word of "dark" command
|
||||
#define VGACMD(jmp,num) (((u32)(jmp)<<27) | (u32)(num)) // assemble control word
|
||||
|
||||
// --- overlapped layer init word (delay: use number of offset pixels * Vmode.cpp, num: number of pixels)
|
||||
|
||||
// init word of key color layer LAYERPROG_KEY
|
||||
#define VGAKEY(delay,num,col) (((u32)((delay)+1)<<19) | ((u32)(col)<<11) | (u32)((num)-1))
|
||||
|
||||
// init word of mono layer LAYERPROG_MONO
|
||||
#define VGAMONO(delay,num,col) (((u32)((delay)+0)<<20) | ((u32)(col)<<12) | ((u32)((num)-1)<<1) | B0)
|
||||
|
||||
// init word of color layer LAYERPROG_MONO
|
||||
#define VGACOLOR(delay,num) (((u32)((delay)+2)<<20) | ((u32)0xff<<12) | ((u32)((num)-1)<<1) | 0)
|
||||
|
||||
// init word of black color layer LAYERPROG_BLACK
|
||||
#define VGABLACK(delay,num) (((u32)((delay)+3)<<16) | (u32)((num)-1))
|
||||
|
||||
// init word of white color layer LAYERPROG_WHITE
|
||||
#define VGAWHITE(delay,num) (((u32)((delay)+3)<<16) | (u32)((num)-1))
|
||||
|
||||
// init word of RLE layer LAYERPROG_RLE
|
||||
#define VGARLE(delay) ((delay)+1)
|
||||
|
||||
// swap bytes of command
|
||||
#define BYTESWAP(n) ((((n)&0xff)<<24)|(((n)&0xff00)<<8)|(((n)&0xff0000)>>8)|(((n)&0xff000000)>>24))
|
||||
|
||||
// align to multiple of 4
|
||||
#define ALIGN4(x) ((x) & ~3)
|
||||
|
||||
// layer program descriptor
|
||||
typedef struct {
|
||||
const u16* ins; // pointer to program instructions (NULL=layers is OFF)
|
||||
const struct pio_program* prg; // pointer to program descriptor
|
||||
u8 length; // program length (number of instructions)
|
||||
u8 wrap_target; // offset of wrap target
|
||||
u8 wrap; // offset of wrap end
|
||||
u8 idle; // offset of idle
|
||||
u8 entry; // offset of entry
|
||||
u8 maxidle; // max. offset of idle to detect end of job
|
||||
u8 extranum; // number of extra offsets
|
||||
u8 extra[2*16]; // extra offsets, pairs: offset, CPP-correction
|
||||
} sLayerProg;
|
||||
|
||||
// layer program descriptors
|
||||
extern const sLayerProg LayerProg[LAYERPROG_NUM];
|
||||
|
||||
// current layer program of overlapped layers
|
||||
extern u8 LayerProgInx; // index of current layer program (LAYERPROG_*, LAYERPROG_BASE = overlapped layers are OFF)
|
||||
extern sLayerProg CurLayerProg; // copy of current layer program
|
||||
|
||||
// layer mode descriptor
|
||||
typedef struct {
|
||||
u8 prog; // layer program (LAYERPROG_*)
|
||||
u8 mincpp; // minimal clock cycles per pixel
|
||||
u8 maxcpp; // maximal clock cycles per pixel
|
||||
} sLayerMode;
|
||||
|
||||
// layer mode descriptors
|
||||
extern const sLayerMode LayerMode[LAYERMODE_NUM];
|
||||
|
||||
// current layer mode of layers
|
||||
extern u8 LayerModeInx[LAYERS]; // index of current layer mode (LAYERMODE_*)
|
||||
extern sLayerMode CurLayerMode[LAYERS]; // copy of current layer mode
|
||||
|
||||
// layer screen descriptor (on change update SLAYER_* in define.h)
|
||||
typedef struct {
|
||||
const u8* img; // pointer to image in current layer format, or sprite list
|
||||
const void* par; // additional parameter (RLE index table, integer transformation matrix)
|
||||
u32 init; // init word sent on start of scanline (start X coordinate)
|
||||
u32 keycol; // key color
|
||||
u16 trans; // trans count
|
||||
s16 x; // start X coordinate
|
||||
s16 y; // start Y coordinate
|
||||
u16 w; // width in pixels
|
||||
u16 h; // height
|
||||
u16 wb; // image width in bytes (pitch of lines)
|
||||
u8 mode; // layer mode
|
||||
s8 horiz; // horizon of perspective projection/4 (only with LAYERMODE_PERSP* modes, 0=no perspecitve, <0 ceilling)
|
||||
u8 xbits; // number of bits of width of source image (only with LAYERMODE_PERSP* modes)
|
||||
u8 ybits; // number of bits of height of source image (only with LAYERMODE_PERSP* modes)
|
||||
u16 spritenum; // number of sprites
|
||||
Bool on; // layer is ON
|
||||
u8 cpp; // current clock pulses per pixel (used to calculate X coordinate)
|
||||
} sLayer;
|
||||
|
||||
// sprite (on change update SSPRITE_* in define.h)
|
||||
typedef struct {
|
||||
u8* img; // SSPRITE_IMG pointer to image data
|
||||
u8* x0; // SSPRITE_X0 pointer to array of start of lines, or fast sprite start of lines/4
|
||||
u8* w0; // SSPRITE_W0 pointer to array of length of lines, or fast sprite length of lines/4
|
||||
u32 keycol; // SSPRITE_KEYCOL key color
|
||||
s16 x; // SSPRITE_X sprite X-coordinate on the screen
|
||||
s16 y; // SSPRITE_Y sprite Y-coordinate on the screen
|
||||
u16 w; // SSPRITE_W sprite width (slow sprite: max. width 255)
|
||||
u16 h; // SSPRITE_H sprite height
|
||||
u16 wb; // SSPRITE_WB sprite pitch (number of bytes between lines)
|
||||
u16 res; // ...reserved, structure align
|
||||
} sSprite;
|
||||
|
||||
// current layer screens
|
||||
extern sLayer LayerScreen[LAYERS]; // layer screens
|
||||
|
||||
extern u8 LayerMask; // mask of active layers
|
||||
|
||||
// index of first pin of layer (base layer should stay VGA_GPIO_FIRST)
|
||||
extern u8 LayerFirstPin[LAYERS_MAX];
|
||||
|
||||
// number of pins of overlapped layer (base layer should stay VGA_GPIO_OUTNUM)
|
||||
extern u8 LayerNumPin[LAYERS_MAX];
|
||||
|
||||
// set overlapped layer 1..3 ON
|
||||
void LayerOn(u8 inx);
|
||||
|
||||
// set overlapped layer 1..3 OFF
|
||||
void LayerOff(u8 inx);
|
||||
|
||||
// set coordinate X of overlapped layer
|
||||
void LayerSetX(u8 inx, s16 x);
|
||||
|
||||
// set coordinate Y of overlapped layer
|
||||
void LayerSetY(u8 inx, s16 y);
|
||||
|
||||
// set width of image of overlapped layer
|
||||
// Uses auto pitch wb (full line). Set custom wb after calling this function.
|
||||
void LayerSetW(u8 inx, u16 w);
|
||||
|
||||
// set height of image of overlapped layer
|
||||
void LayerSetH(u8 inx, u16 h);
|
||||
|
||||
// setup overlapped layer 1..3 (not for sprites and not for perspective mode)
|
||||
// inx ... layer index 1..3
|
||||
// img ... pointer to image data
|
||||
// vmode ... pointer to initialized video configuration
|
||||
// w ... image width in pixels (must be multiple of 4)
|
||||
// h ... image height
|
||||
// col ... key color (needed for LAYERMODE_KEY and LAYERMODE_MONO layer mode)
|
||||
// par ... additional data (RLE index table, integer transformation matrix)
|
||||
// Use these functions after layer setup: LayerSetX, LayerSetY, LayerOn
|
||||
void LayerSetup(u8 inx, const u8* img, const sVmode* vmode, u16 w, u16 h, u8 col = 0, const void* par = NULL);
|
||||
|
||||
// setup overlapped layer 1..3 for LAYERMODE_PERSP* modes
|
||||
// inx ... layer index 1..3
|
||||
// img ... pointer to source image data (image width and height must be power of 2)
|
||||
// vmode ... pointer to initialized video configuration
|
||||
// w ... destination image width in pixels (must be multiple of 4)
|
||||
// h ... destination image height
|
||||
// xbits ... number of bits of width of source image
|
||||
// ybits ... number of bits of height of source image
|
||||
// horiz ... horizon of perspective projection/4 (0=no perspecitve, <0 ceilling)
|
||||
// mat ... integer transformation matrix
|
||||
// col ... key color (needed for LAYERMODE_PERSPKEY layer mode)
|
||||
// Use these functions after layer setup: LayerSetX, LayerSetY, LayerOn
|
||||
void LayerPerspSetup(u8 inx, const u8* img, const sVmode* vmode, u16 w, u16 h, u8 xbits, u8 ybits,
|
||||
s8 horiz, const int* mat, u8 col = 0);
|
||||
|
||||
// setup overlapped layer 1..3 for LAYERMODE_SPRITE* and LAYERMODE_FASTSPRITE* modes
|
||||
// inx ... layer index 1..3
|
||||
// sprite ... pointer to list of sprites (array of pointers to sprites; sorted by X on LAYERMODE_FASTSPRITE* modes)
|
||||
// spritenum ... number of sprites in the list (to turn sprite off, you can set its coordinate Y out of the screen)
|
||||
// vmode ... pointer to initialized video configuration
|
||||
// x ... start coordinate X of area with sprites
|
||||
// y ... start coordinate Y of area with sprites
|
||||
// w ... width of area with sprites (must be multiple of 4)
|
||||
// h ... height of area with sprites
|
||||
// col ... key color (needed for LAYERMODE_SPRITEKEY and LAYERMODE_FASTSPRITEKEY layer mode)
|
||||
// Use functions LayerOn after layer setup.
|
||||
void LayerSpriteSetup(u8 inx, sSprite** sprite, u16 spritenum, const sVmode* vmode,
|
||||
s16 x, s16 y, u16 w, u16 h, u8 col = 0);
|
||||
|
||||
// prepare array of start and length of lines (detects transparent pixels)
|
||||
// img ... image
|
||||
// x0 ... array of start of lines
|
||||
// w0 ... array of length of lines
|
||||
// w ... sprite width (slow sprite: max. width 255)
|
||||
// h ... sprite height
|
||||
// wb ... sprite pitch (bytes between lines)
|
||||
// col ... key color
|
||||
// fast ... fast sprite, divide start and length of line by 4
|
||||
void SpritePrepLines(const u8* img, u8* x0, u8* w0, u16 w, u16 h, u16 wb, u8 col, Bool fast);
|
||||
|
||||
// sort fast sprite list by X coordinate
|
||||
void SortSprite(sSprite** list, int num);
|
||||
|
||||
#endif // _VGA_LAYER_H
|
||||
|
|
@ -1,109 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA colors and palettes
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#ifndef _VGA_PAL_H
|
||||
#define _VGA_PAL_H
|
||||
|
||||
#define MULTICOL(a,b,c,d) ((a)|((b)<<8)|((c)<<16)|((d)<<24)) // multiply color pattern (used in mode GF_COLOR)
|
||||
|
||||
// CGA colors
|
||||
#define CGACOL_0 0 // 0x000000 black
|
||||
#define CGACOL_1 2 // 0x0000C3 dark blue
|
||||
#define CGACOL_2 20 // 0x00C300 dark green
|
||||
#define CGACOL_3 22 // 0x00C3C3 dark cyan
|
||||
#define CGACOL_4 160 // 0xC30000 dark red
|
||||
#define CGACOL_5 162 // 0xC300C3 dark magenta
|
||||
#define CGACOL_6 168 // 0xC35400 brown
|
||||
#define CGACOL_7 182 // 0xC3C3C3 light gray
|
||||
#define CGACOL_8 73 // 0x545454 dark gray
|
||||
#define CGACOL_9 75 // 0x5454FF light blue
|
||||
#define CGACOL_10 93 // 0x54FF54 light green
|
||||
#define CGACOL_11 95 // 0x54FFFF light cyan
|
||||
#define CGACOL_12 233 // 0xFF5454 light red
|
||||
#define CGACOL_13 235 // 0xFF54FF light magenta
|
||||
#define CGACOL_14 253 // 0xFFFF54 yellow
|
||||
#define CGACOL_15 255 // 0xFFFFFF white
|
||||
|
||||
// ZX Spectrum color
|
||||
#define ZXCOL_0 0 // 0x000000 black
|
||||
#define ZXCOL_1 2 // 0x0000C3 dark blue
|
||||
#define ZXCOL_2 160 // 0xC30000 dark red
|
||||
#define ZXCOL_3 162 // 0xC300C3 dark magenta
|
||||
#define ZXCOL_4 20 // 0x00C300 dark green
|
||||
#define ZXCOL_5 22 // 0x00C3C3 dark cyan
|
||||
#define ZXCOL_6 180 // 0xC3C300 dark yellow
|
||||
#define ZXCOL_7 182 // 0xC3C3C3 light gray
|
||||
#define ZXCOL_8 73 // 0x545454 dark gray
|
||||
#define ZXCOL_9 3 // 0x0000FF light blue
|
||||
#define ZXCOL_10 224 // 0xFF0000 light red
|
||||
#define ZXCOL_11 227 // 0xFF00FF light magenta
|
||||
#define ZXCOL_12 28 // 0x00FF00 light green
|
||||
#define ZXCOL_13 31 // 0x00FFFF light cyan
|
||||
#define ZXCOL_14 252 // 0xFFFF00 yellow
|
||||
#define ZXCOL_15 255 // 0xFFFFFF white
|
||||
|
||||
// Colors
|
||||
// GP0 ... B0 ... VGA B0 blue
|
||||
// GP1 ... B1 ... VGA B1
|
||||
// GP2 ... B2 ... VGA G0 green
|
||||
// GP3 ... B3 ... VGA G1
|
||||
// GP4 ... B4 ... VGA G2
|
||||
// GP5 ... B5 ... VGA R0 red
|
||||
// GP6 ... B6 ... VGA R1
|
||||
// GP7 ... B7 ... VGA R2
|
||||
|
||||
#define COL_BLACK 0
|
||||
|
||||
#define COL_DARKBLUE B0
|
||||
#define COL_SEMIBLUE B1
|
||||
#define COL_BLUE (B0+B1)
|
||||
#define COL_MOREBLUE (COL_BLUE+B3+B6)
|
||||
#define COL_LIGHTBLUE (COL_BLUE+B4+B7)
|
||||
|
||||
#define COL_DARKGREEN B3
|
||||
#define COL_SEMIGREEN B4
|
||||
#define COL_GREEN (B2+B3+B4)
|
||||
#define COL_MOREGREEN (COL_GREEN+B0+B6)
|
||||
#define COL_LIGHTGREEN (COL_GREEN+B1+B7)
|
||||
|
||||
#define COL_DARKRED B6
|
||||
#define COL_SEMIRED B7
|
||||
#define COL_RED (B5+B6+B7)
|
||||
#define COL_MORERED (COL_RED+B0+B3)
|
||||
#define COL_LIGHTRED (COL_RED+B1+B4)
|
||||
|
||||
#define COL_DARKCYAN (B0+B3)
|
||||
#define COL_SEMICYAN (B1+B4)
|
||||
#define COL_CYAN (B0+B1+B2+B3+B4)
|
||||
|
||||
#define COL_DARKMAGENTA (B0+B6)
|
||||
#define COL_SEMIMAGENTA (B1+B7)
|
||||
#define COL_MAGENTA (B0+B1+B5+B6+B7)
|
||||
|
||||
#define COL_DARKYELLOW (B3+B6)
|
||||
#define COL_SEMIYELLOW (B4+B7)
|
||||
#define COL_YELLOW (B2+B3+B4+B5+B6+B7)
|
||||
|
||||
#define COL_GRAY0 0
|
||||
#define COL_GRAY1 (B2+B5)
|
||||
#define COL_GRAY2 (B0+B3+B6)
|
||||
#define COL_GRAY3 (B0+B2+B3+B5+B6)
|
||||
#define COL_GRAY4 (B1+B4+B7)
|
||||
#define COL_GRAY5 (B1+B2+B4+B5+B7)
|
||||
#define COL_GRAY6 (B0+B1+B3+B4+B6+B7)
|
||||
#define COL_GRAY7 (B0+B1+B2+B3+B4+B5+B6+B7)
|
||||
|
||||
#define COL_WHITE COL_GRAY7
|
||||
|
||||
// compose color from RGB
|
||||
#define COLRGB(r,g,b) ((u8)(((r)&0xe0)|(((g)&0xe0)>>3)|((b)>>6)))
|
||||
|
||||
// default 16-color palettes (CGA colors)
|
||||
// - do not set "const", to stay in faster RAM
|
||||
extern u8 DefPal16[16];
|
||||
|
||||
#endif // _VGA_PAL_H
|
||||
|
|
@ -1,313 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA render
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#include "define.h" // common definitions of C and ASM
|
||||
|
||||
.syntax unified
|
||||
.section .time_critical.Render, "ax"
|
||||
.cpu cortex-m0plus
|
||||
.thumb // use 16-bit instructions
|
||||
|
||||
.extern pScreen // sScreen* pScreen; // pointer to current video screen
|
||||
.extern LineBuf0 // u8 LineBuf0[BLACK_MAX]; // line buffer with black color
|
||||
|
||||
// extern "C" u32* Render(u32* cbuf, u8* dbuf, int line, int pixnum);
|
||||
|
||||
// render scanline
|
||||
// cbuf ... control buffer
|
||||
// dbuf ... data buffer (pixel data)
|
||||
// line ... current scanline 0..
|
||||
// pixnum ... total pixels (must be multiple of 4)
|
||||
// Returns new pointer to control buffer
|
||||
|
||||
.thumb_func
|
||||
.global Render
|
||||
Render:
|
||||
|
||||
// push registers
|
||||
push {r4-r7,lr}
|
||||
|
||||
// prepare local variables
|
||||
// SP+0: input argument of render functions
|
||||
// SP+4: R0 control buffer
|
||||
// SP+8: R1 data buffer (pixel data)
|
||||
// SP+12: R2 current scanline 0..
|
||||
// SP+16: R3 total pixels
|
||||
// SP+20: R4
|
||||
// SP+24: R5
|
||||
// SP+28: R6
|
||||
// SP+32: R7
|
||||
// SP+36: LR
|
||||
|
||||
sub sp,#20
|
||||
str r0,[sp,#4] // control buffer
|
||||
str r1,[sp,#8] // data buffer
|
||||
str r3,[sp,#16] // total pixels
|
||||
|
||||
// ---- prepare pointer to current screen
|
||||
// sScreen* s = pScreen;
|
||||
// if (s != NULL) {
|
||||
|
||||
// prepare pointer to current screen
|
||||
ldr r4,Render_pScreenAddr // pointer to pointer to current video Screen (variable pScreen)
|
||||
ldr r4,[r4,#0] // pointer to current video Screen
|
||||
cmp r4,#0 // is pointer valid?
|
||||
beq Render_Clear // pointer is not valid, clear rest of line (display is OFF)
|
||||
|
||||
// ---- find video strip with current scanline
|
||||
// int stripnum = s->num;
|
||||
// sStrip* t = &s->strip[0];
|
||||
// for (; stripnum > 0; stripnum--) {
|
||||
|
||||
// loop through video strips
|
||||
ldrh r5,[r4,#SSCREEN_NUM] // u16 number of video strips
|
||||
tst r5,r5 // check number of video strips
|
||||
beq Render_Clear // no video strips, return
|
||||
adds r4,#SSCREEN_STRIP // pointer to first video strip
|
||||
|
||||
// R2 ... current scanline
|
||||
// R4 ... pointer to video strip
|
||||
// R5 ... counter of video strips
|
||||
|
||||
Render_StripLoop:
|
||||
|
||||
// chek if current scanline has been found
|
||||
// if (line < t->height) {
|
||||
ldrh r3,[r4,#SSTRIP_HEIGHT] // u16 height of this video strip
|
||||
cmp r2,r3 // check if current scanline fits into this video strip
|
||||
blo Render_StripOK // scanline < strip height, this strip is OK
|
||||
|
||||
// subtract video strip height from scanline number (to be relative to start of strip)
|
||||
// line -= t->height;
|
||||
subs r2,r3 // subtract strip height from scanline number
|
||||
|
||||
// next video strip
|
||||
// t++;
|
||||
// for (; stripnum > 0; stripnum--)
|
||||
adds r4,#SSTRIP_SIZE // shift pointer to next video strip
|
||||
subs r5,#1 // counter of video strips
|
||||
bne Render_StripLoop // next video strip
|
||||
b Render_Clear // video strip not found
|
||||
|
||||
// ---- process all video segments
|
||||
|
||||
Render_StripOK:
|
||||
|
||||
// prepare first video segment
|
||||
// sSegm* g = &t->seg[0];
|
||||
// int segnum = t->num;
|
||||
// for (; segnum > 0; segnum--) {
|
||||
str r2,[sp,#12] // save current scanline
|
||||
ldrh r5,[r4,#SSTRIP_NUM] // u16 number of video segments
|
||||
tst r5,r5 // check number of video segments
|
||||
beq Render_Clear // no video strips, return
|
||||
adds r4,#SSTRIP_SEG // pointer to first video segment
|
||||
|
||||
// R4 ... pointer to video segment
|
||||
// R5 ... counter of video segments
|
||||
|
||||
Render_SegmLoop:
|
||||
|
||||
// get number of remaining pixels
|
||||
ldr r2,[sp,#16] // get remaining pixels
|
||||
tst r2,r2 // check number of pixels
|
||||
beq Render_Clear // end of scanline, stop rendering
|
||||
|
||||
// get segment width -> R3
|
||||
// int w = g->width;
|
||||
// if (w > pixnum) w = pixnum;
|
||||
// if (w > 0) {
|
||||
ldrh r3,[r4,#SSEGM_WIDTH] // get segment width
|
||||
cmp r3,r2 // check width
|
||||
blo 2f // width is OK
|
||||
mov r3,r2 // limit width by total width
|
||||
2: tst r3,r3 // check width
|
||||
beq Render_SegmNext // this segment is invisible, skip it
|
||||
|
||||
// update remaining pixels
|
||||
// pixnum -= w;
|
||||
subs r2,r3 // decrease remaining width
|
||||
str r2,[sp,#16] // store new remaining pixels
|
||||
|
||||
// get Y coordinate -> R2
|
||||
// int y = g->offy + line;
|
||||
ldrh r2,[r4,#SSEGM_OFFY] // get offset at Y direction
|
||||
sxth r2,r2 // expand to signed
|
||||
ldr r1,[sp,#12] // get current scanline
|
||||
add r2,r1 // add Y offset and current scanline
|
||||
|
||||
// double lines
|
||||
// if (g->dbly) y /= 2;
|
||||
ldrb r1,[r4,#SSEGM_DBLY] // get dbly flag
|
||||
tst r1,r1 // is dbly flag set?
|
||||
beq 2f // dbly flag not set
|
||||
asrs r2,#1 // Y coordinate / 2
|
||||
|
||||
// wrap Y coordinate
|
||||
// int wy = g->wrapy;
|
||||
// while (y < 0) y += wy;
|
||||
// while (y >= wy) y -= wy;
|
||||
2: ldrh r1,[r4,#SSEGM_WRAPY] // get wrapy
|
||||
3: subs r2,r1 // subtract wrapy
|
||||
bpl 3b // repeat
|
||||
4: adds r2,r1 // add wrapy
|
||||
bmi 4b // repeat
|
||||
|
||||
// get X coordinate -> R1
|
||||
// int x = g->offx;
|
||||
6: ldrh r1,[r4,#SSEGM_OFFX] // get offset at X direction
|
||||
sxth r1,r1 // expand to signed
|
||||
|
||||
// wrap X coordinate
|
||||
// int wx = g->wrapx;
|
||||
// while (x < 0) x += wx;
|
||||
// while (x >= wx) x -= wx;
|
||||
ldrh r0,[r4,#SSEGM_WRAPX] // get wrapx
|
||||
3: subs r1,r0 // subtract wrapx
|
||||
bpl 3b // repeat
|
||||
4: adds r1,r0 // add wrapx
|
||||
bmi 4b // repeat
|
||||
|
||||
// ---- process 1st format group: GF_COLOR
|
||||
|
||||
// get format -> R0
|
||||
6: ldrb r0,[r4,#SSEGM_FORM] // get current format
|
||||
|
||||
// serve format GF_COLOR
|
||||
tst r0,r0 // format GF_COLOR ?
|
||||
bne 7f // no
|
||||
|
||||
// u32 par = ((y & 1) == 0) ? g->par : g->par2
|
||||
lsrs r2,#1 // check bit 0 of Y coordinate
|
||||
ldr r1,[r4,#SSEGM_PAR] // get par for even line
|
||||
bcc 2f // even line
|
||||
ldr r1,[r4,#SSEGM_PAR2] // get par2 for odd line
|
||||
|
||||
// *cbuf++ = w/4; // number of pixels/4
|
||||
2: lsrs r2,r3,#2 // width/4
|
||||
ldr r6,[sp,#4] // get pointer to control buffer
|
||||
stmia r6!,{r2} // store width/4
|
||||
|
||||
// *cbuf++ = (u32)dbuf; // pointer to data buffer
|
||||
ldr r0,[sp,#8] // get pointer to data buffer
|
||||
stmia r6!,{r0} // store pointer to data
|
||||
str r6,[sp,#4] // save new pointer to control buffer
|
||||
|
||||
// dbuf = RenderColor(dbuf, par, w/4);
|
||||
bl RenderColor
|
||||
str r0,[sp,#8] // store new pointer to data buffer
|
||||
b Render_SegmNext
|
||||
|
||||
// ---- process 2nd format group: using control buffer cbuf
|
||||
|
||||
// prepare input argument video segment -> [SP+0]
|
||||
7: str r4,[sp,#0] // prepare 4th argument - current video segment
|
||||
|
||||
// prepare function addres -> R7
|
||||
adr r7,Render_FncAddr // get address of jump table
|
||||
lsls r6,r0,#2 // format * 4
|
||||
ldr r7,[r7,r6] // load function address -> R7
|
||||
|
||||
// check 2nd format group
|
||||
cmp r0,#GF_GRP2MAX // check 2nd format group
|
||||
bhi 2f // > 2nd group
|
||||
|
||||
// cbuf = RenderGraph8(cbuf, x, y, w, g);
|
||||
ldr r0,[sp,#4] // get pointer to control buffer
|
||||
blx r7 // call render function
|
||||
str r0,[sp,#4] // save new pointer to control buffer
|
||||
b Render_SegmNext
|
||||
|
||||
// ---- process 3rd format group: using data buffer dbuf
|
||||
|
||||
// *cbuf++ = w/4; // number of pixels/4
|
||||
2: lsrs r0,r3,#2 // width/4
|
||||
ldr r6,[sp,#4] // get pointer to control buffer
|
||||
stmia r6!,{r0} // store width/4
|
||||
|
||||
// *cbuf++ = (u32)dbuf; // pointer to data buffer
|
||||
ldr r0,[sp,#8] // get pointer to data buffer
|
||||
stmia r6!,{r0} // store pointer to data
|
||||
str r6,[sp,#4] // save new pointer to control buffer
|
||||
|
||||
// dbuf = RenderColor(dbuf, par, w/4);
|
||||
blx r7 // call render function
|
||||
str r0,[sp,#8] // store new pointer to data buffer
|
||||
|
||||
Render_SegmNext:
|
||||
|
||||
// next video segment
|
||||
adds r4,#SSEGM_SIZE // shift pointer to next video segment
|
||||
subs r5,#1 // counter of video segments
|
||||
bne Render_SegmLoop // next video segment
|
||||
|
||||
// ---- clear rest of line, write pointer to control buffer
|
||||
|
||||
Render_Clear:
|
||||
|
||||
// return current control buffer
|
||||
ldr r0,[sp,#4] // control buffer
|
||||
|
||||
// check if some pixels left
|
||||
ldr r1,[sp,#16] // number of remaining pixels
|
||||
lsrs r1,#2 // number of pixels/4 (= number of 4-pixels)
|
||||
beq 9f // no pixels left
|
||||
|
||||
// write size and address to control buffer
|
||||
ldr r2,Render_LineBuf0Addr // data buffer with black color
|
||||
stmia r0!,{r1,r2} // write number of 4-pixels and pointer to data buffer to control buffer
|
||||
|
||||
// pop registers and return (return control buffer in r0)
|
||||
9: add sp,#20
|
||||
pop {r4-r7,pc}
|
||||
|
||||
.align 2
|
||||
|
||||
// pointer to pointer with current video screen
|
||||
Render_pScreenAddr:
|
||||
.word pScreen
|
||||
|
||||
// pointer to buffer with black color
|
||||
Render_LineBuf0Addr:
|
||||
.word LineBuf0
|
||||
|
||||
// poiners to render functions
|
||||
Render_FncAddr:
|
||||
// 1st format group
|
||||
.word RenderColor // GF_COLOR simple color (par=color pattern 4-pixels even line, par2=color pattern 4-pixels odd line)
|
||||
|
||||
// 2nd format group
|
||||
.word RenderGraph8 // GF_GRAPH8 native 8-bit graphics (X1Y1R2G2B2) - fast, transfers "as is" to PIO
|
||||
.word RenderTile // GF_TILE tiles
|
||||
.word RenderTile2 // GF_TILE alternate tiles
|
||||
.word RenderProgress // GF_PROGRESS horizontal progress indicator
|
||||
.word RenderGrad1 // render gradient with 1 line GF_GRAD1
|
||||
.word RenderGrad2 // render gradient with 2 lines GF_GRAD2
|
||||
|
||||
// 3rd format group
|
||||
.word RenderGraph4 // GF_GRAPH4 4-bit graphics
|
||||
.word RenderGraph2 // GF_GRAPH2 2-bit graphics
|
||||
.word RenderGraph1 // GF_GRAPH1 1-bit graphics
|
||||
.word RenderMText // GF_MTEXT 8-pixel mono text
|
||||
.word RenderAText // GF_ATEXT 8-pixel attribute text, character + 2x4 bit attributes
|
||||
.word RenderFText // GF_FTEXT 8-pixel foreground color text, character + foreground color
|
||||
.word RenderCText // GF_CTEXT 8-pixel color text, character + background color + foreground color
|
||||
.word RenderGText // GF_GTEXT 8-pixel gradient text (par = pointer to 1-bit font, par2 = pointer to color array)
|
||||
.word RenderDText // GF_DTEXT 8-pixel double gradient text (par = pointer to 1-bit font, par2 = pointer to color array)
|
||||
.word RenderLevel // GF_LEVEL level graph
|
||||
.word RenderLevelGrad // GF_LEVELGRAD level gradient graph
|
||||
.word RenderOscil // GF_OSCIL oscilloscope pixel graph
|
||||
.word RenderOscLine // GF_OSCLINE oscilloscope line graph
|
||||
.word RenderPlane2 // GF_PLANE2 4 colors on 2 graphic planes
|
||||
.word RenderAttrib8 // GF_ATTRIB8 2x4 bit color attribute per 8x8 pixel sample
|
||||
.word RenderGraph8Mat // GF_GRAPH8MAT 8-bit graphics with 2D matrix transformation
|
||||
.word RenderGraph8Persp // GF_GRAPH8PERSP 8-bit graphics with perspective projection
|
||||
.word RenderTilePersp // GF_TILEPERSP tiles with perspective
|
||||
.word RenderTilePersp15 // GF_TILEPERSP15 tiles with perspective, 1.5 pixels
|
||||
.word RenderTilePersp2 // GF_TILEPERSP2 tiles with perspective, double pixels
|
||||
.word RenderTilePersp3 // GF_TILEPERSP3 tiles with perspective, triple pixels
|
||||
.word RenderTilePersp4 // GF_TILEPERSP4 tiles with perspective, quadruple pixels
|
||||
|
|
@ -1,707 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA screen layout
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#include "include.h"
|
||||
|
||||
// current video screen
|
||||
sScreen Screen = { .num = 0 }; // default video screen
|
||||
sScreen* pScreen = &Screen; // pointer to current video screen
|
||||
|
||||
// clear screen (set 0 strips, does not modify sprites)
|
||||
void ScreenClear(sScreen* s)
|
||||
{
|
||||
__dmb();
|
||||
s->num = 0;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// add empty strip to the screen (returns pointer to the strip)
|
||||
sStrip* ScreenAddStrip(sScreen* s, int height)
|
||||
{
|
||||
int n = s->num;
|
||||
sStrip* t = &s->strip[n];
|
||||
t->height = height;
|
||||
t->num = 0;
|
||||
__dmb();
|
||||
s->num = n + 1;
|
||||
__dmb();
|
||||
return t;
|
||||
}
|
||||
|
||||
// add empty segment to video strip (returns pointer to the segment and initialises is to defaults)
|
||||
sSegm* ScreenAddSegm(sStrip* strip, int width)
|
||||
{
|
||||
int n = strip->num;
|
||||
sSegm* g = &strip->seg[n];
|
||||
g->width = width;
|
||||
g->wb = width;
|
||||
g->offx = 0;
|
||||
g->offy = 0;
|
||||
g->wrapx = width;
|
||||
g->wrapy = strip->height;
|
||||
g->data = NULL;
|
||||
g->form = GF_COLOR;
|
||||
g->dbly = false;
|
||||
g->par = 0;
|
||||
g->par2 = 0;
|
||||
__dmb();
|
||||
strip->num = n + 1;
|
||||
__dmb();
|
||||
return g;
|
||||
}
|
||||
|
||||
// set video segment to simple color format GF_COLOR
|
||||
// col1 = color pattern 4-pixels even line (use macro MULTICOL)
|
||||
// col2 = color pattern 4-pixels odd line (use macro MULTICOL)
|
||||
void ScreenSegmColor(sSegm* segm, u32 col1, u32 col2)
|
||||
{
|
||||
segm->par = col1;
|
||||
segm->par2 = col2;
|
||||
__dmb();
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to gradient with 1 line
|
||||
// data = pointer to data buffer with gradient
|
||||
// wb = pitch - length of buffer
|
||||
// To scroll gradient, set virtual dimension wrapx, then shift offx
|
||||
void ScreenSegmGrad1(sSegm* segm, const void* data, int wb)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = data;
|
||||
segm->wb = wb;
|
||||
__dmb();
|
||||
segm->form = GF_GRAD1;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to gradient with 2 lines
|
||||
// data = pointer to data buffer with gradient
|
||||
// wb = pitch - lenght of buffer
|
||||
// To scroll gradient, set virtual dimension wrapx, then shift offx
|
||||
void ScreenSegmGrad2(sSegm* segm, const void* data, int wb)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = data;
|
||||
segm->wb = wb;
|
||||
__dmb();
|
||||
segm->form = GF_GRAD2;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to native 8-bit graphics (R3G3B2)
|
||||
// data = pointer to data buffer
|
||||
// wb = pitch - number of bytes between lines
|
||||
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
|
||||
void ScreenSegmGraph8(sSegm* segm, const void* data, int wb)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = data;
|
||||
segm->wb = wb;
|
||||
__dmb();
|
||||
segm->form = GF_GRAPH8;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// generate 16-color palette translation table for functions ScreenSegmGraph4
|
||||
// trans = pointer to destination palette translation table (u16 trans[256])
|
||||
// pal = pointer to source palette of 16 colors (u8 pal[16])
|
||||
void GenPal16Trans(u16* trans, const u8* pal)
|
||||
{
|
||||
int i, j;
|
||||
u16 k;
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
j = (i >> 4) & 0x0f;
|
||||
k = pal[j];
|
||||
|
||||
j = i & 0x0f;
|
||||
k |= (u16)pal[j] << 8;
|
||||
|
||||
trans[i] = k;
|
||||
}
|
||||
}
|
||||
|
||||
// set video segment to 4-bit palette graphics
|
||||
// data = pointer to data buffer
|
||||
// trans = pointer to 16-color palette translation table (generated with GenPal16Trans function)
|
||||
// wb = pitch - number of bytes between lines
|
||||
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
|
||||
void ScreenSegmGraph4(sSegm* segm, const void* data, const void* trans, int wb)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = data;
|
||||
segm->par = (u32)trans;
|
||||
segm->wb = wb;
|
||||
__dmb();
|
||||
segm->form = GF_GRAPH4;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// generate palette 4 translation table for functions ScreenSegmGraph2
|
||||
// trans = pointer to destination palette translation table (u32 trans[256])
|
||||
// pal = pointer to source palette of 4 colors (u8 pal[4])
|
||||
void GenPal4Trans(u32* trans, const u8* pal)
|
||||
{
|
||||
int i, j;
|
||||
u32 k;
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
j = (i >> 6) & 0x03;
|
||||
k = pal[j];
|
||||
|
||||
j = (i >> 4) & 0x03;
|
||||
k |= (u32)pal[j] << 8;
|
||||
|
||||
j = (i >> 2) & 0x03;
|
||||
k |= (u32)pal[j] << 16;
|
||||
|
||||
j = i & 0x03;
|
||||
k |= (u32)pal[j] << 24;
|
||||
|
||||
trans[i] = k;
|
||||
}
|
||||
}
|
||||
|
||||
// set video segment to 2-bit palette graphics
|
||||
// data = pointer to data buffer
|
||||
// trans = pointer to 4-color palette translation table (generated with GenPal4Trans function)
|
||||
// wb = pitch - number of bytes between lines
|
||||
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
|
||||
void ScreenSegmGraph2(sSegm* segm, const void* data, const void* trans, int wb)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = data;
|
||||
segm->par = (u32)trans;
|
||||
segm->wb = wb;
|
||||
__dmb();
|
||||
segm->form = GF_GRAPH2;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to 1-bit palette graphics
|
||||
// data = pointer to data buffer
|
||||
// bg = background color
|
||||
// fg = foreground color
|
||||
// wb = pitch - number of bytes between lines
|
||||
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
|
||||
void ScreenSegmGraph1(sSegm* segm, const void* data, u8 bg, u8 fg, int wb)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = data;
|
||||
segm->par = bg | ((u32)fg << 8);
|
||||
segm->wb = wb;
|
||||
__dmb();
|
||||
segm->form = GF_GRAPH1;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to 8-pixel mono text
|
||||
// data = pointer to text buffer
|
||||
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
|
||||
// fontheight = font height
|
||||
// bg = background color
|
||||
// fg = foreground color
|
||||
// wb = pitch - number of bytes between text lines
|
||||
void ScreenSegmMText(sSegm* segm, const void* data, const void* font, u16 fontheight, u8 bg, u8 fg, int wb)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = data;
|
||||
segm->par = (u32)font;
|
||||
segm->par2 = bg | ((u32)fg << 8);
|
||||
segm->par3 = fontheight;
|
||||
segm->wb = wb;
|
||||
__dmb();
|
||||
segm->form = GF_MTEXT;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to 8-pixel attribute text
|
||||
// data = pointer to text buffer (character + 2x4 bit attributes)
|
||||
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
|
||||
// fontheight = font height
|
||||
// pal = pointer to palette of 16 colors
|
||||
// wb = pitch - number of bytes between text lines
|
||||
void ScreenSegmAText(sSegm* segm, const void* data, const void* font, u16 fontheight, const void* pal, int wb)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = data;
|
||||
segm->par = (u32)font;
|
||||
segm->par2 = (u32)pal;
|
||||
segm->par3 = fontheight;
|
||||
segm->wb = wb;
|
||||
__dmb();
|
||||
segm->form = GF_ATEXT;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to 8-pixel foreground color text
|
||||
// data = pointer to text buffer (character + foreground color)
|
||||
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
|
||||
// fontheight = font height
|
||||
// bg = background color
|
||||
// wb = pitch - number of bytes between text lines
|
||||
void ScreenSegmFText(sSegm* segm, const void* data, const void* font, u16 fontheight, u8 bg, int wb)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = data;
|
||||
segm->par = (u32)font;
|
||||
segm->par2 = bg;
|
||||
segm->par3 = fontheight;
|
||||
segm->wb = wb;
|
||||
__dmb();
|
||||
segm->form = GF_FTEXT;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to 8-pixel color text
|
||||
// data = pointer to text buffer (character + background color + foreground color)
|
||||
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
|
||||
// fontheight = font height
|
||||
// wb = pitch - number of bytes between text lines
|
||||
void ScreenSegmCText(sSegm* segm, const void* data, const void* font, u16 fontheight, int wb)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = data;
|
||||
segm->par = (u32)font;
|
||||
segm->par3 = fontheight;
|
||||
segm->wb = wb;
|
||||
__dmb();
|
||||
segm->form = GF_CTEXT;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to 8-pixel gradient color text
|
||||
// data = pointer to text buffer (character + foreground color)
|
||||
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
|
||||
// fontheight = font height
|
||||
// bg = background color
|
||||
// grad = pointer to array of gradient colors
|
||||
// wb = pitch - number of bytes between text lines
|
||||
void ScreenSegmGText(sSegm* segm, const void* data, const void* font, u8 fontheight, u8 bg, const void* grad, int wb)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = data;
|
||||
segm->par = (u32)font;
|
||||
segm->par3 = bg | (fontheight << 8);
|
||||
segm->par2 = (u32)grad;
|
||||
segm->wb = wb;
|
||||
__dmb();
|
||||
segm->form = GF_GTEXT;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to 8-pixel double gradient color text
|
||||
// data = pointer to text buffer (character + foreground color)
|
||||
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
|
||||
// fontheight = font height
|
||||
// bg = background color
|
||||
// grad = pointer to array of gradient colors
|
||||
// wb = pitch - number of bytes between text lines
|
||||
void ScreenSegmDText(sSegm* segm, const void* data, const void* font, u8 fontheight, u8 bg, const void* grad, int wb)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = data;
|
||||
segm->par = (u32)font;
|
||||
segm->par3 = bg | (fontheight << 8);
|
||||
segm->par2 = (u32)grad;
|
||||
segm->wb = wb;
|
||||
__dmb();
|
||||
segm->form = GF_DTEXT;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to tiles
|
||||
// data = pointer to tile map buffer (with tile indices)
|
||||
// tiles = pointer to 1 column of tiles, 1 pixel = 8 bits
|
||||
// w = tile width (must be multiple of 4)
|
||||
// h = tile height
|
||||
// wb = pitch - number of bytes between tile map rows
|
||||
void ScreenSegmTile(sSegm* segm, const void* data, const void* tiles, int w, int h, int wb)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = data;
|
||||
segm->par = (u32)tiles;
|
||||
segm->par2 = (u32)h;
|
||||
segm->par3 = (u16)w;
|
||||
segm->wb = wb;
|
||||
segm->wrapx = (segm->width+w-1)/w*w;
|
||||
segm->wrapy = (segm->wrapy+h-1)/h*h;
|
||||
__dmb();
|
||||
segm->form = GF_TILE;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to alternate tiles
|
||||
// data = pointer to tile map buffer (with tile indices)
|
||||
// tiles = pointer to 1 row of tiles, 1 pixel = 8 bits
|
||||
// w = tile width (must be multiple of 4)
|
||||
// h = tile height
|
||||
// tilewb = tile width bytes (usually tile width * number of tiles)
|
||||
// wb = pitch - number of bytes between tile map rows
|
||||
void ScreenSegmTile2(sSegm* segm, const void* data, const void* tiles, int w, int h, int tilewb, int wb)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = data;
|
||||
segm->par = (u32)tiles;
|
||||
segm->par2 = (u32)h + ((u32)(u16)tilewb << 16);
|
||||
segm->par3 = (u16)w;
|
||||
segm->wb = wb;
|
||||
segm->wrapx = (segm->width+w-1)/w*w;
|
||||
segm->wrapy = (segm->wrapy+h-1)/h*h;
|
||||
__dmb();
|
||||
segm->form = GF_TILE2;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to level graph GF_LEVEL
|
||||
// data = pointer to buffer with line samples 0..255
|
||||
// bg = background color
|
||||
// fg = foreground color
|
||||
// zero = Y zero level
|
||||
void ScreenSegmLevel(sSegm* segm, const void* data, u8 bg, u8 fg, u8 zero)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = data;
|
||||
segm->par = bg | ((u32)fg << 8);
|
||||
segm->par2 = zero;
|
||||
__dmb();
|
||||
segm->form = GF_LEVEL;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to leve gradient graph GF_LEVELGRAD
|
||||
// data = pointer to buffer with values 0..255 of 4-pixels in rows
|
||||
// sample1 = scanline sample < data
|
||||
// sample2 = scanline sample >= data
|
||||
void ScreenSegmLevelGrad(sSegm* segm, const void* data, const void* sample1, const void* sample2)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = data;
|
||||
segm->par = (u32)sample1;
|
||||
segm->par2 = (u32)sample2;
|
||||
__dmb();
|
||||
segm->form = GF_LEVELGRAD;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to oscilloscope 1-pixel graph GF_OSCIL
|
||||
// data = pointer to buffer with line samples 0..255
|
||||
// bg = background color
|
||||
// fg = foreground color
|
||||
// pixh = height of pixels - 1
|
||||
void ScreenSegmOscil(sSegm* segm, const void* data, u8 bg, u8 fg, int pixh)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = data;
|
||||
segm->par = bg | ((u32)fg << 8);
|
||||
segm->par2 = pixh;
|
||||
__dmb();
|
||||
segm->form = GF_OSCIL;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to oscilloscope line graph GF_OSCLINE
|
||||
// data = pointer to buffer with line samples 0..255
|
||||
// bg = background color
|
||||
// fg = foreground color
|
||||
void ScreenSegmOscLine(sSegm* segm, const void* data, u8 bg, u8 fg)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = data;
|
||||
segm->par = bg | ((u32)fg << 8);
|
||||
__dmb();
|
||||
segm->form = GF_OSCLINE;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// generate palette 4-planes translation table for function ScreenSegmPlane2
|
||||
// trans = pointer to destination palette translation table (u32 trans[256])
|
||||
// pal = pointer to source palette of 4 colors (u8 pal[4])
|
||||
void GenPal4Plane(u32* trans, const u8* pal)
|
||||
{
|
||||
int i, j;
|
||||
u32 k;
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
j = 0;
|
||||
if ((i & B7) != 0) j |= B1;
|
||||
if ((i & B3) != 0) j |= B0;
|
||||
k = pal[j];
|
||||
|
||||
j = 0;
|
||||
if ((i & B6) != 0) j |= B1;
|
||||
if ((i & B2) != 0) j |= B0;
|
||||
k |= (u32)pal[j] << 8;
|
||||
|
||||
j = 0;
|
||||
if ((i & B5) != 0) j |= B1;
|
||||
if ((i & B1) != 0) j |= B0;
|
||||
k |= (u32)pal[j] << 16;
|
||||
|
||||
j = 0;
|
||||
if ((i & B4) != 0) j |= B1;
|
||||
if ((i & B0) != 0) j |= B0;
|
||||
k |= (u32)pal[j] << 24;
|
||||
|
||||
trans[i] = k;
|
||||
}
|
||||
}
|
||||
|
||||
// set video segment to 4-color on 2-planes graphics
|
||||
// data = pointer to data buffer
|
||||
// plane = offset of 2nd graphics plane (in bytes), size of one graphics plane
|
||||
// trans = pointer to 4-color palette translation table (generated with GenPal4Plane function)
|
||||
// wb = pitch - number of bytes between lines
|
||||
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
|
||||
void ScreenSegmPlane2(sSegm* segm, const void* data, int plane, const void* trans, int wb)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = data;
|
||||
segm->par = plane;
|
||||
segm->par2 = (u32)trans;
|
||||
segm->wb = wb;
|
||||
__dmb();
|
||||
segm->form = GF_PLANE2;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to 2x4 bit color attribute per 8x8 pixel sample graphics
|
||||
// data = pointer to data buffer with mono pixels
|
||||
// attr = pointer to color attributes
|
||||
// pal = pointer to 16-color palette table
|
||||
// wb = pitch - number of bytes between lines
|
||||
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
|
||||
void ScreenSegmAttrib8(sSegm* segm, const void* data, const void* attr, const u8* pal, int wb)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = data;
|
||||
segm->par = (u32)attr;
|
||||
segm->par2 = (u32)pal;
|
||||
segm->wb = wb;
|
||||
__dmb();
|
||||
segm->form = GF_ATTRIB8;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to horizontal progress indicator GF_PROGRESS
|
||||
// data = pointer to buffer with values 0..255 of 4-pixels in rows
|
||||
// sample1 = scanline sample < data
|
||||
// sample2 = scanline sample >= data
|
||||
void ScreenSegmProgress(sSegm* segm, const void* data, const void* sample1, const void* sample2)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = data;
|
||||
segm->par = (u32)sample1;
|
||||
segm->par2 = (u32)sample2;
|
||||
__dmb();
|
||||
segm->form = GF_PROGRESS;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to 8-bit graphics with 2D matrix transformation
|
||||
// data = pointer to image data (width and height of image must be power of 2)
|
||||
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
|
||||
// xbits = number of bits of image width (image width must be power of 2 and must be = pitch width bytes)
|
||||
// ybits = number of bits of image height (image height must be power of 2)
|
||||
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
|
||||
void ScreenSegmGraph8Mat(sSegm* segm, const void* data, const int* mat, u16 xbits, u16 ybits)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = data;
|
||||
segm->wb = (1<<xbits);
|
||||
segm->offx = 0;
|
||||
segm->offy = 0;
|
||||
segm->wrapx = segm->width;
|
||||
segm->par = (u32)mat;
|
||||
segm->par2 = xbits | ((u32)ybits << 16);
|
||||
__dmb();
|
||||
segm->form = GF_GRAPH8MAT;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to 8-bit graphics with perspective projection
|
||||
// data = pointer to image data (width and height of image must be power of 2)
|
||||
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
|
||||
// xbits = number of bits of image width (image width must be power of 2 and must be = pitch width bytes)
|
||||
// ybits = number of bits of image height (image height must be power of 2)
|
||||
// horiz = horizon offset
|
||||
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
|
||||
void ScreenSegmGraph8Persp(sSegm* segm, const void* data, const int* mat, u16 xbits, u16 ybits, u16 horiz)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = data;
|
||||
segm->wb = (1<<xbits);
|
||||
segm->offx = 0;
|
||||
segm->offy = 0;
|
||||
segm->wrapx = segm->width;
|
||||
segm->par = (u32)mat;
|
||||
segm->par2 = xbits | ((u32)ybits << 16);
|
||||
segm->par3 = horiz;
|
||||
__dmb();
|
||||
segm->form = GF_GRAPH8PERSP;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to tiles with perspective
|
||||
// map = pointer to tile map with tile indices (width and height must be power of 2)
|
||||
// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2)
|
||||
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
|
||||
// mapwbits = number of bits of tile map width
|
||||
// maphbits = number of bits of tile map height
|
||||
// tilebits = number of bits of tile width and height
|
||||
// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling)
|
||||
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
|
||||
void ScreenSegmTilePersp(sSegm* segm, const u8* map, const u8* tiles, const int* mat,
|
||||
u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = map;
|
||||
segm->wb = mapwbits | ((u16)maphbits<<8);
|
||||
segm->offx = 0;
|
||||
segm->offy = 0;
|
||||
segm->wrapx = segm->width;
|
||||
segm->par = (u32)tiles;
|
||||
segm->par2 = (u32)mat;
|
||||
segm->par3 = tilebits | ((u16)horizon<<8);
|
||||
__dmb();
|
||||
segm->form = GF_TILEPERSP;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to tiles with perspective, 1.5 pixels
|
||||
// map = pointer to tile map with tile indices (width and height must be power of 2)
|
||||
// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2)
|
||||
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
|
||||
// mapwbits = number of bits of tile map width
|
||||
// maphbits = number of bits of tile map height
|
||||
// tilebits = number of bits of tile width and height
|
||||
// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling)
|
||||
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
|
||||
void ScreenSegmTilePersp15(sSegm* segm, const u8* map, const u8* tiles, const int* mat,
|
||||
u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = map;
|
||||
segm->wb = mapwbits | ((u16)maphbits<<8);
|
||||
segm->offx = 0;
|
||||
segm->offy = 0;
|
||||
segm->wrapx = segm->width;
|
||||
segm->par = (u32)tiles;
|
||||
segm->par2 = (u32)mat;
|
||||
segm->par3 = tilebits | ((u16)horizon<<8);
|
||||
__dmb();
|
||||
segm->form = GF_TILEPERSP15;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to tiles with perspective, double pixels
|
||||
// map = pointer to tile map with tile indices (width and height must be power of 2)
|
||||
// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2)
|
||||
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
|
||||
// mapwbits = number of bits of tile map width
|
||||
// maphbits = number of bits of tile map height
|
||||
// tilebits = number of bits of tile width and height
|
||||
// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling)
|
||||
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
|
||||
void ScreenSegmTilePersp2(sSegm* segm, const u8* map, const u8* tiles, const int* mat,
|
||||
u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = map;
|
||||
segm->wb = mapwbits | ((u16)maphbits<<8);
|
||||
segm->offx = 0;
|
||||
segm->offy = 0;
|
||||
segm->wrapx = segm->width;
|
||||
segm->par = (u32)tiles;
|
||||
segm->par2 = (u32)mat;
|
||||
segm->par3 = tilebits | ((u16)horizon<<8);
|
||||
__dmb();
|
||||
segm->form = GF_TILEPERSP2;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to tiles with perspective, triple pixels
|
||||
// map = pointer to tile map with tile indices (width and height must be power of 2)
|
||||
// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2)
|
||||
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
|
||||
// mapwbits = number of bits of tile map width
|
||||
// maphbits = number of bits of tile map height
|
||||
// tilebits = number of bits of tile width and height
|
||||
// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling)
|
||||
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
|
||||
void ScreenSegmTilePersp3(sSegm* segm, const u8* map, const u8* tiles, const int* mat,
|
||||
u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = map;
|
||||
segm->wb = mapwbits | ((u16)maphbits<<8);
|
||||
segm->offx = 0;
|
||||
segm->offy = 0;
|
||||
segm->wrapx = segm->width;
|
||||
segm->par = (u32)tiles;
|
||||
segm->par2 = (u32)mat;
|
||||
segm->par3 = tilebits | ((u16)horizon<<8);
|
||||
__dmb();
|
||||
segm->form = GF_TILEPERSP3;
|
||||
__dmb();
|
||||
}
|
||||
|
||||
// set video segment to tiles with perspective, quadruple pixels
|
||||
// map = pointer to tile map with tile indices (width and height must be power of 2)
|
||||
// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2)
|
||||
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
|
||||
// mapwbits = number of bits of tile map width
|
||||
// maphbits = number of bits of tile map height
|
||||
// tilebits = number of bits of tile width and height
|
||||
// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling)
|
||||
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
|
||||
void ScreenSegmTilePersp4(sSegm* segm, const u8* map, const u8* tiles, const int* mat,
|
||||
u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon)
|
||||
{
|
||||
segm->form = GF_COLOR;
|
||||
__dmb();
|
||||
segm->data = map;
|
||||
segm->wb = mapwbits | ((u16)maphbits<<8);
|
||||
segm->offx = 0;
|
||||
segm->offy = 0;
|
||||
segm->wrapx = segm->width;
|
||||
segm->par = (u32)tiles;
|
||||
segm->par2 = (u32)mat;
|
||||
segm->par3 = tilebits | ((u16)horizon<<8);
|
||||
__dmb();
|
||||
segm->form = GF_TILEPERSP4;
|
||||
__dmb();
|
||||
}
|
||||
|
|
@ -1,307 +0,0 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA screen layout
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#ifndef _VGA_SCREEN_H
|
||||
#define _VGA_SCREEN_H
|
||||
|
||||
// video segment (on change update SSEGM_* in define.h)
|
||||
typedef struct {
|
||||
u16 width; // SSEGM_WIDTH width of this video segment in pixels (must be multiple of 4, 0=inactive segment)
|
||||
u16 wb; // SSEGM_WB pitch - number of bytes between lines
|
||||
s16 offx; // SSEGM_OFFX display offset at X direction (must be multiple of 4)
|
||||
s16 offy; // SSEGM_OFFY display offset at Y direction
|
||||
u16 wrapx; // SSEGM_WRAPX wrap width in X direction (number of pixels, must be multiply of 4 and > 0)
|
||||
// text modes: wrapx must be multiply of 8
|
||||
u16 wrapy; // SSEGM_WRAPY wrap width in Y direction (number of lines, cannot be 0)
|
||||
const void* data; // SSEGM_DATA pointer to video buffer with image data
|
||||
u8 form; // SSEGM_FORM graphics format GF_*
|
||||
bool dbly; // SSEGM_DBLY double Y (2 scanlines per 1 image line)
|
||||
u16 par3; // SSEGM_PAR3 parameter 3
|
||||
u32 par; // SSEGM_PAR parameter 1
|
||||
u32 par2; // SSEGM_PAR2 parameter 2
|
||||
} sSegm;
|
||||
|
||||
// video strip (on change update SSTRIP_* in define.h)
|
||||
typedef struct {
|
||||
u16 height; // SSTRIP_HEIGHT height of this strip in number of scanlines
|
||||
u16 num; // SSTRIP_NUM number of video segments
|
||||
sSegm seg[SEGMAX]; // SSTRIP_SEG list of video segments
|
||||
} sStrip;
|
||||
|
||||
// video screen (on change update SSCREEN_* in define.h)
|
||||
typedef struct {
|
||||
u16 num; // SSCREEN_NUM number of video strips
|
||||
u16 backup; // SSCREEN_BACKUP backup number of video strips during display OFF
|
||||
sStrip strip[STRIPMAX]; // SSCREEN_STRIP list of video strips
|
||||
} sScreen;
|
||||
|
||||
// current video screen
|
||||
extern sScreen Screen; // default video screen
|
||||
extern sScreen* pScreen; // pointer to current video screen
|
||||
|
||||
// clear screen (set 0 strips, does not modify sprites)
|
||||
void ScreenClear(sScreen* s);
|
||||
|
||||
// add empty strip to the screen (returns pointer to the strip)
|
||||
sStrip* ScreenAddStrip(sScreen* s, int height);
|
||||
|
||||
// add empty segment to video strip (returns pointer to the segment and initialises is to defaults)
|
||||
sSegm* ScreenAddSegm(sStrip* strip, int width);
|
||||
|
||||
// set video segment to simple color format GF_COLOR
|
||||
// col1 = color pattern 4-pixels even line (use macro MULTICOL)
|
||||
// col2 = color pattern 4-pixels odd line (use macro MULTICOL)
|
||||
void ScreenSegmColor(sSegm* segm, u32 col1, u32 col2);
|
||||
|
||||
// set video segment to gradient with 1 line
|
||||
// data = pointer to data buffer with gradient
|
||||
// wb = pitch - length of buffer
|
||||
// To scroll gradient, set virtual dimension wrapx, then shift offx
|
||||
void ScreenSegmGrad1(sSegm* segm, const void* data, int wb);
|
||||
|
||||
// set video segment to gradient with 2 lines
|
||||
// data = pointer to data buffer with gradient
|
||||
// wb = pitch - lenght of buffer
|
||||
// To scroll gradient, set virtual dimension wrapx, then shift offx
|
||||
void ScreenSegmGrad2(sSegm* segm, const void* data, int wb);
|
||||
|
||||
// set video segment to native 8-bit graphics (R3G3B2)
|
||||
// data = pointer to data buffer
|
||||
// wb = pitch - number of bytes between lines
|
||||
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
|
||||
void ScreenSegmGraph8(sSegm* segm, const void* data, int wb);
|
||||
|
||||
// generate 16-color palette translation table
|
||||
// trans = pointer to destination palette translation table (u16 trans[256])
|
||||
// pal = pointer to source palette of 16 colors (u8 pal[16])
|
||||
void GenPal16Trans(u16* trans, const u8* pal);
|
||||
|
||||
// set video segment to 4-bit palette graphics
|
||||
// data = pointer to data buffer
|
||||
// trans = pointer to 16-color palette translation table (generated with GenPal16Trans function)
|
||||
// wb = pitch - number of bytes between lines
|
||||
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
|
||||
void ScreenSegmGraph4(sSegm* segm, const void* data, const void* trans, int wb);
|
||||
|
||||
// generate palette 4 translation table for function ScreenSegmGraph2
|
||||
// trans = pointer to destination palette translation table (u32 trans[256])
|
||||
// pal = pointer to source palette of 4 colors (u8 pal[4])
|
||||
void GenPal4Trans(u32* trans, const u8* pal);
|
||||
|
||||
// set video segment to 2-bit palette graphics
|
||||
// data = pointer to data buffer
|
||||
// trans = pointer to 4-color palette translation table (generated with GenPal4Trans function)
|
||||
// wb = pitch - number of bytes between lines
|
||||
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
|
||||
void ScreenSegmGraph2(sSegm* segm, const void* data, const void* trans, int wb);
|
||||
|
||||
// set video segment to 1-bit palette graphics
|
||||
// data = pointer to data buffer
|
||||
// bg = background color
|
||||
// fg = foreground color
|
||||
// wb = pitch - number of bytes between lines
|
||||
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
|
||||
void ScreenSegmGraph1(sSegm* segm, const void* data, u8 bg, u8 fg, int wb);
|
||||
|
||||
// set video segment to 8-pixel mono text
|
||||
// data = pointer to text buffer
|
||||
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
|
||||
// fontheight = font height
|
||||
// bg = background color
|
||||
// fg = foreground color
|
||||
// wb = pitch - number of bytes between text lines
|
||||
void ScreenSegmMText(sSegm* segm, const void* data, const void* font, u16 fontheight, u8 bg, u8 fg, int wb);
|
||||
|
||||
// set video segment to 8-pixel attribute text
|
||||
// data = pointer to text buffer (character + 2x4 bit attributes)
|
||||
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
|
||||
// fontheight = font height
|
||||
// pal = pointer to palette of 16 colors
|
||||
// wb = pitch - number of bytes between text lines
|
||||
void ScreenSegmAText(sSegm* segm, const void* data, const void* font, u16 fontheight, const void* pal, int wb);
|
||||
|
||||
// set video segment to 8-pixel foreground color text
|
||||
// data = pointer to text buffer (character + foreground color)
|
||||
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
|
||||
// fontheight = font height
|
||||
// bg = background color
|
||||
// wb = pitch - number of bytes between text lines
|
||||
void ScreenSegmFText(sSegm* segm, const void* data, const void* font, u16 fontheight, u8 bg, int wb);
|
||||
|
||||
// set video segment to 8-pixel color text
|
||||
// data = pointer to text buffer (character + background color + foreground color)
|
||||
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
|
||||
// fontheight = font height
|
||||
// wb = pitch - number of bytes between text lines
|
||||
void ScreenSegmCText(sSegm* segm, const void* data, const void* font, u16 fontheight, int wb);
|
||||
|
||||
// set video segment to 8-pixel gradient color text
|
||||
// data = pointer to text buffer (character + foreground color)
|
||||
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
|
||||
// fontheight = font height
|
||||
// bg = background color
|
||||
// grad = pointer to array of gradient colors
|
||||
// wb = pitch - number of bytes between text lines
|
||||
void ScreenSegmGText(sSegm* segm, const void* data, const void* font, u8 fontheight, u8 bg, const void* grad, int wb);
|
||||
|
||||
// set video segment to 8-pixel double gradient color text
|
||||
// data = pointer to text buffer (character + foreground color)
|
||||
// font = pointer to 1-bit font of 256 characters of width 8 (total width of image 2048 pixels)
|
||||
// fontheight = font height
|
||||
// bg = background color
|
||||
// grad = pointer to array of gradient colors
|
||||
// wb = pitch - number of bytes between text lines
|
||||
void ScreenSegmDText(sSegm* segm, const void* data, const void* font, u8 fontheight, u8 bg, const void* grad, int wb);
|
||||
|
||||
// set video segment to tiles
|
||||
// data = pointer to tile map buffer (with tile indices)
|
||||
// tiles = pointer to 1 column of tiles, 1 pixel = 8 bits
|
||||
// w = tile width (must be multiple of 4)
|
||||
// h = tile height
|
||||
// wb = pitch - number of bytes between tile map rows
|
||||
void ScreenSegmTile(sSegm* segm, const void* data, const void* tiles, int w, int h, int wb);
|
||||
|
||||
// set video segment to alternate tiles
|
||||
// data = pointer to tile map buffer (with tile indices)
|
||||
// tiles = pointer to 1 row of tiles, 1 pixel = 8 bits
|
||||
// w = tile width (must be multiple of 4)
|
||||
// h = tile height
|
||||
// tilewb = tile width bytes (usually tile width * number of tiles)
|
||||
// wb = pitch - number of bytes between tile map rows
|
||||
void ScreenSegmTile2(sSegm* segm, const void* data, const void* tiles, int w, int h, int tilewb, int wb);
|
||||
|
||||
// set video segment to level graph GF_LEVEL
|
||||
// data = pointer to buffer with line samples 0..255
|
||||
// zero = Y zero level
|
||||
// bg = background color
|
||||
// fg = foreground color
|
||||
void ScreenSegmLevel(sSegm* segm, const void* data, u8 zero, u8 bg, u8 fg);
|
||||
|
||||
// set video segment to leve gradient graph GF_LEVELGRAD
|
||||
// data = pointer to buffer with values 0..255 of 4-pixels in rows
|
||||
// sample1 = scanline sample < data
|
||||
// sample2 = scanline sample >= data
|
||||
void ScreenSegmLevelGrad(sSegm* segm, const void* data, const void* sample1, const void* sample2);
|
||||
|
||||
// set video segment to oscilloscope 1-pixel graph GF_OSCIL
|
||||
// data = pointer to buffer with line samples 0..255
|
||||
// bg = background color
|
||||
// fg = foreground color
|
||||
// pixh = height of pixels - 1
|
||||
void ScreenSegmOscil(sSegm* segm, const void* data, u8 bg, u8 fg, int pixh);
|
||||
|
||||
// set video segment to oscilloscope line graph GF_OSCLINE
|
||||
// data = pointer to buffer with line samples 0..255
|
||||
// bg = background color
|
||||
// fg = foreground color
|
||||
void ScreenSegmOscLine(sSegm* segm, const void* data, u8 bg, u8 fg);
|
||||
|
||||
// generate palette 4-color translation table for function ScreenSegmPlane2
|
||||
// trans = pointer to destination palette translation table (u32 trans[256])
|
||||
// pal = pointer to source palette of 4 colors (u8 pal[4])
|
||||
void GenPal4Plane(u32* trans, const u8* pal);
|
||||
|
||||
// set video segment to 4-color on 2-planes graphics
|
||||
// data = pointer to data buffer
|
||||
// plane = offset of 2nd graphics plane (in bytes), size of one graphics plane
|
||||
// trans = pointer to 4-color palette translation table (generated with GenPal4Plane function)
|
||||
// wb = pitch - number of bytes between lines
|
||||
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
|
||||
void ScreenSegmPlane2(sSegm* segm, const void* data, int plane, const void* trans, int wb);
|
||||
|
||||
// set video segment to 2x4 bit color attribute per 8x8 pixel sample graphics
|
||||
// data = pointer to data buffer with mono pixels
|
||||
// attr = pointer to color attributes
|
||||
// pal = pointer to 16-color palette table
|
||||
// wb = pitch - number of bytes between lines
|
||||
// To scroll image, set virtual dimension wrapx and wrapy, then shift offx and offy.
|
||||
void ScreenSegmAttrib8(sSegm* segm, const void* data, const void* attr, const u8* pal, int wb);
|
||||
|
||||
// set video segment to horizontal progress indicator GF_PROGRESS
|
||||
// data = pointer to buffer with values 0..255 of 4-pixels in rows
|
||||
// sample1 = scanline sample < data
|
||||
// sample2 = scanline sample >= data
|
||||
void ScreenSegmProgress(sSegm* segm, const void* data, const void* sample1, const void* sample2);
|
||||
|
||||
// set video segment to 8-bit graphics with 2D matrix transformation
|
||||
// data = pointer to image data (width and height of image must be power of 2)
|
||||
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
|
||||
// xbits = number of bits of image width (image width must be power of 2 and must be = pitch width bytes)
|
||||
// ybits = number of bits of image height (image height must be power of 2)
|
||||
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
|
||||
void ScreenSegmGraph8Mat(sSegm* segm, const void* data, const int* mat, u16 xbits, u16 ybits);
|
||||
|
||||
// set video segment to 8-bit graphics with perspective projection
|
||||
// data = pointer to image data (width and height of image must be power of 2)
|
||||
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
|
||||
// xbits = number of bits of image width (image width must be power of 2 and must be = pitch width bytes)
|
||||
// ybits = number of bits of image height (image height must be power of 2)
|
||||
// horiz = horizon offset
|
||||
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
|
||||
void ScreenSegmGraph8Persp(sSegm* segm, const void* data, const int* mat, u16 xbits, u16 ybits, u16 horiz);
|
||||
|
||||
// set video segment to tiles with perspective
|
||||
// map = pointer to tile map with tile indices (width and height must be power of 2)
|
||||
// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2)
|
||||
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
|
||||
// mapwbits = number of bits of tile map width
|
||||
// maphbits = number of bits of tile map height
|
||||
// tilebits = number of bits of tile width and height
|
||||
// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling)
|
||||
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
|
||||
void ScreenSegmTilePersp(sSegm* segm, const u8* map, const u8* tiles, const int* mat,
|
||||
u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon);
|
||||
|
||||
// set video segment to tiles with perspective, 1.5 pixels
|
||||
// map = pointer to tile map with tile indices (width and height must be power of 2)
|
||||
// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2)
|
||||
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
|
||||
// mapwbits = number of bits of tile map width
|
||||
// maphbits = number of bits of tile map height
|
||||
// tilebits = number of bits of tile width and height
|
||||
// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling)
|
||||
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
|
||||
void ScreenSegmTilePersp15(sSegm* segm, const u8* map, const u8* tiles, const int* mat,
|
||||
u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon);
|
||||
|
||||
// set video segment to tiles with perspective, double pixels
|
||||
// map = pointer to tile map with tile indices (width and height must be power of 2)
|
||||
// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2)
|
||||
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
|
||||
// mapwbits = number of bits of tile map width
|
||||
// maphbits = number of bits of tile map height
|
||||
// tilebits = number of bits of tile width and height
|
||||
// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling)
|
||||
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
|
||||
void ScreenSegmTilePersp2(sSegm* segm, const u8* map, const u8* tiles, const int* mat,
|
||||
u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon);
|
||||
|
||||
// set video segment to tiles with perspective, triple pixels
|
||||
// map = pointer to tile map with tile indices (width and height must be power of 2)
|
||||
// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2)
|
||||
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
|
||||
// mapwbits = number of bits of tile map width
|
||||
// maphbits = number of bits of tile map height
|
||||
// tilebits = number of bits of tile width and height
|
||||
// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling)
|
||||
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
|
||||
void ScreenSegmTilePersp3(sSegm* segm, const u8* map, const u8* tiles, const int* mat,
|
||||
u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon);
|
||||
|
||||
// set video segment to tiles with perspective, quadruple pixels
|
||||
// map = pointer to tile map with tile indices (width and height must be power of 2)
|
||||
// tiles = pointer to 1 column of square tiles, 1 pixel = 8 bits (width and height must be power of 2)
|
||||
// mat = pointer to array of 6 matrix integer parameters m11, m12...m23 (exported with ExportInt function)
|
||||
// mapwbits = number of bits of tile map width
|
||||
// maphbits = number of bits of tile map height
|
||||
// tilebits = number of bits of tile width and height
|
||||
// horizon = horizon offset/4 (0=do not use perspective projection, <0=vertical flip to display ceiling)
|
||||
// Use default settings of parameters: offx = 0, offy = 0, wrapx = segment width, wrapy = segment height
|
||||
void ScreenSegmTilePersp4(sSegm* segm, const u8* map, const u8* tiles, const int* mat,
|
||||
u8 mapwbits, u8 maphbits, u8 tilebits, s8 horizon);
|
||||
|
||||
#endif // _VGA_SCREEN_H
|
||||
|
|
@ -1,40 +1,17 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA videomodes
|
||||
//
|
||||
// file derived from the PicoVGA project
|
||||
// https://github.com/Panda381/PicoVGA
|
||||
// by Miroslav Nemecek
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#include "include.h"
|
||||
|
||||
sVmode Vmode; // videomode setup
|
||||
sVgaCfg Cfg; // required configuration
|
||||
sCanvas Canvas; // canvas of draw box
|
||||
|
||||
// default 16-color palettes (EGA colors)
|
||||
// - do not set "const", to stay in faster RAM
|
||||
u8 DefPal16[16] = {
|
||||
CGACOL_0, // 0 // 0x000000 black
|
||||
CGACOL_1, // 2 // 0x0000AA dark blue
|
||||
CGACOL_2, // 20 // 0x00B600 dark green
|
||||
CGACOL_3, // 22 // 0x00B6AA dark cyan
|
||||
CGACOL_4, // 160 // 0xB60000 dark red
|
||||
CGACOL_5, // 162 // 0xB600AA dark magenta
|
||||
CGACOL_6, // 168 // 0xB64900 brown
|
||||
CGACOL_7, // 182 // 0xB6B6AA light gray
|
||||
|
||||
CGACOL_8, // 73 // 0x494955 dark gray
|
||||
CGACOL_9, // 75 // 0x4949FF light blue
|
||||
CGACOL_10, // 93 // 0x49FF55 light green
|
||||
CGACOL_11, // 95 // 0x49FFFF light cyan
|
||||
CGACOL_12, // 233 // 0xFF4955 light red
|
||||
CGACOL_13, // 235 // 0xFF49FF light magenta
|
||||
CGACOL_14, // 253 // 0xFFFF55 yellow
|
||||
CGACOL_15, // 255 // 0xFFFFFF white
|
||||
};
|
||||
|
||||
// 16-color palette translation table
|
||||
u16 Pal16Trans[256];
|
||||
|
||||
/*
|
||||
http://martin.hinner.info/vga/pal.html
|
||||
|
|
@ -45,211 +22,9 @@ time 0:
|
|||
- line 3..35: (33) dark
|
||||
- line 36..515: (480) image lines 0..479
|
||||
- line 516..525: (10) dark
|
||||
|
||||
PAL system (625 lines total):
|
||||
time 0:
|
||||
- line 1, 2: (2) vertical sync + vertical sync
|
||||
- line 3: (1) vertical sync + half sync
|
||||
- line 4, 5: (2) half sync + half sync
|
||||
- line 6..23: (18) dark
|
||||
- line 24..46: (23) dark image
|
||||
time 46:
|
||||
- line 47..286: (240) image lines odd 1, 3, 5 ... 479
|
||||
- line 287..310: (24) dark image
|
||||
- line 311..312: (2) half sync + half sync
|
||||
- line 313: (1) half sync + vertical sync
|
||||
vsync time 313 (vsync time 312.5):
|
||||
- line 314..315: (2) vertical sync + vertical sync
|
||||
- line 316..317: (2) half sync + half sync
|
||||
- line 318..335: (18) dark
|
||||
- line 336..358: (23) dark image
|
||||
time 358 (45.5 from last vsync)
|
||||
- line 359..598: (240) image lines even 0, 2, ... 478
|
||||
- line 599..622: (24) dark image
|
||||
- line 623..625: (3) half sync + half sync
|
||||
time 625:
|
||||
|
||||
NTSC system (525 lines total):
|
||||
time 0, even field:
|
||||
- line 1..3: (3) vertical sync + vertical sync (6 serration pulses: 27.3 us low, 4.5 us high)
|
||||
- line 4..6: (3) half sync + half sync (6 equalizing pulses: 2.3 us low, 29.5 us high)
|
||||
- line 7..16: (10) dark (blanked video: 4.7 us low, 58.9 us high)
|
||||
- line 17,18: (2) dark image
|
||||
time 18:
|
||||
- line 19..258: (240) image lines even 0, 2, ... 478
|
||||
- line 259: (1) dark image
|
||||
- line 260..262: (3) half sync + half sync (7 equalizing pulses)
|
||||
- line 263: (1) half sync + vertical sync (6 serration pulses)
|
||||
time 263 (vsync time 262.5):
|
||||
- line 264,265: (2) vertical sync + vertical sync
|
||||
- line 266: (1) vertical sync + half sync (5 equalizing pulses)
|
||||
- line 267..268: (2) half sync + half sync
|
||||
- line 269..279: (11) dark
|
||||
- line 280..281: (2) dark image
|
||||
time 281 (18.5 from last vsync)
|
||||
- line 282..521: (240) image lines odd 1, 3, 5 ... 479
|
||||
- line 522: (1) dark image
|
||||
- line 523..525: (3) half sync + half sync
|
||||
time 525:
|
||||
|
||||
*/
|
||||
|
||||
// === TV videomodes
|
||||
|
||||
// TV PAL interlaced 5:4 720x576 (4:3 768x576, 16:9 1024x576)
|
||||
const sVideo VideoPAL = {
|
||||
// horizontal (horizontal frequency 15625 Hz, effective sync pulses 16000 Hz)
|
||||
.htot= 64.00000f, // total scanline in [us]
|
||||
.hfront= 1.65000f, // H front porch (after image, before HSYNC) in [us]
|
||||
.hsync= 4.70000f, // H sync pulse in [us]
|
||||
.hback= 5.70000f, // H back porch (after HSYNC, before image) in [us]
|
||||
.hfull= 47.36000f, // H full visible in [us] (formally should be 51.95 us)
|
||||
|
||||
// vertical (vertical frequency 50 Hz)
|
||||
.vtot=625, // total scanlines (both subframes)
|
||||
.vmax=576, // maximal height
|
||||
|
||||
// subframe 1
|
||||
.vsync1=5, // V sync (half-)pulses on subframe 1
|
||||
.vpost1=5, // V sync post half-pulses on subframe 1
|
||||
.vback1=18+23, // V back porch (after VSYNC, before image) on subframe 1
|
||||
.vact1=240, // active visible scanlines, subframe 1 (formally should be 288, 576 total)
|
||||
.vfront1=24, // V front porch (after image, before VSYNC) on subframe 1
|
||||
.vpre1=5, // V sync pre half-pulses on subframe 1
|
||||
|
||||
// subframe 2 (ignored if not interlaced)
|
||||
.vsync2=5, // V sync half-pulses on subframe 2
|
||||
.vpost2=4, // V sync post half-pulses on subframe 2
|
||||
.vback2=18+23, // V back porch (after VSYNC, before image) on subframe 2
|
||||
.vact2=240, // active visible scanlines, subframe 2 (formally should be 288, 576 total)
|
||||
.vfront2=24, // V front porch (after image, before VSYNC) on subframe 2
|
||||
.vpre2=6, // V sync pre half-pulses on subframe 2
|
||||
|
||||
// name
|
||||
.name = "PAL ", // video timing name (VIDEO_NAME_LEN characters + terminating 0)
|
||||
|
||||
// flags
|
||||
.inter=True, // interlaced (use subframes)
|
||||
.psync=False, // positive synchronization
|
||||
.odd=True, // first sub-frame is odd lines 1, 3, 5,... (PAL)
|
||||
};
|
||||
|
||||
// TV PAL progressive 5:4 360x288 (4:3 384x288, 16:9 512x288)
|
||||
const sVideo VideoPALp = {
|
||||
// horizontal (horizontal frequency 15625 Hz)
|
||||
.htot= 64.00000f, // total scanline in [us]
|
||||
.hfront= 1.65000f, // H front porch (after image, before HSYNC) in [us]
|
||||
.hsync= 4.70000f, // H sync pulse in [us]
|
||||
.hback= 5.70000f, // H back porch (after HSYNC, before image) in [us]
|
||||
.hfull= 47.36000f, // H full visible in [us] (formally should be 51.95 us)
|
||||
|
||||
// vertical (vertical frequency 50 Hz)
|
||||
.vtot=312, // total scanlines (both subframes)
|
||||
.vmax=288, // maximal height
|
||||
|
||||
// subframe 1
|
||||
.vsync1=2, // V sync (half-)pulses on subframe 1
|
||||
.vpost1=0, // V sync post half-pulses on subframe 1
|
||||
.vback1=18+23+2, // V back porch (after VSYNC, before image) on subframe 1
|
||||
.vact1=240, // active visible scanlines, subframe 1 (formally should be 288, 576 total)
|
||||
.vfront1=24+3, // V front porch (after image, before VSYNC) on subframe 1
|
||||
.vpre1=0, // V sync pre half-pulses on subframe 1
|
||||
|
||||
// subframe 2 (ignored if not interlaced)
|
||||
.vsync2=0, // V sync half-pulses on subframe 2
|
||||
.vpost2=0, // V sync post half-pulses on subframe 2
|
||||
.vback2=0, // V back porch (after VSYNC, before image) on subframe 2
|
||||
.vact2=0, // active visible scanlines, subframe 2 (formally should be 288, 576 total)
|
||||
.vfront2=0, // V front porch (after image, before VSYNC) on subframe 2
|
||||
.vpre2=0, // V sync pre half-pulses on subframe 2
|
||||
|
||||
// name
|
||||
.name = "PALp ", // video timing name (VIDEO_NAME_LEN characters + terminating 0)
|
||||
|
||||
// flags
|
||||
.inter=False, // interlaced (use subframes)
|
||||
.psync=False, // positive synchronization
|
||||
.odd=True, // first sub-frame is odd lines 1, 3, 5,... (PAL)
|
||||
};
|
||||
|
||||
// TV NTSC interlaced 4:3 640x480 (5:4 600x480, 16:9 848x480)
|
||||
// serration pulses (half vsync): 27.3 us low, 4.5 us high
|
||||
// equalizing pulses (half hsync): 2.3 us low, 29.5 us high
|
||||
// blanked video (hsync pulses): 4.7 us low, 58.9 us high
|
||||
const sVideo VideoNTSC = {
|
||||
// horizontal (horizontal frequency 15734 Hz, effective sync pulses 16274 Hz)
|
||||
.htot= 63.55582f, // total scanline in [us]
|
||||
.hfront= 1.50000f, // H front porch (after image, before HSYNC) in [us]
|
||||
.hsync= 4.70000f, // H sync pulse in [us]
|
||||
.hback= 4.50000f, // H back porch (after HSYNC, before image) in [us]
|
||||
.hfull= 47.03130f, // H full visible in [us]
|
||||
|
||||
// vertical
|
||||
.vtot=525, // total scanlines (both subframes)
|
||||
.vmax=480, // maximal height
|
||||
|
||||
// subframe 1
|
||||
.vsync1=6, // V sync (half-)pulses on subframe 1
|
||||
.vpost1=6, // V sync post half-pulses on subframe 1
|
||||
.vback1=10+2, // V back porch (after VSYNC, before image) on subframe 1
|
||||
.vact1=240, // active visible scanlines, subframe 1
|
||||
.vfront1=1, // V front porch (after image, before VSYNC) on subframe 1
|
||||
.vpre1=7, // V sync pre half-pulses on subframe 1
|
||||
|
||||
// subframe 2 (ignored if not interlaced)
|
||||
.vsync2=6, // V sync half-pulses on subframe 2
|
||||
.vpost2=5, // V sync post half-pulses on subframe 2
|
||||
.vback2=11+2, // V back porch (after VSYNC, before image) on subframe 2
|
||||
.vact2=240, // active visible scanlines, subframe 2
|
||||
.vfront2=1, // V front porch (after image, before VSYNC) on subframe 2
|
||||
.vpre2=6, // V sync pre half-pulses on subframe 2
|
||||
|
||||
// name
|
||||
.name = "NTSC ", // video timing name (VIDEO_NAME_LEN characters + terminating 0)
|
||||
|
||||
// flags
|
||||
.inter=True, // interlaced (use subframes)
|
||||
.psync=False, // positive synchronization
|
||||
.odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL)
|
||||
};
|
||||
|
||||
// TV NTSC progressive 4:3 320x240 (5:4 300x240, 16:9 424x240)
|
||||
const sVideo VideoNTSCp = {
|
||||
// horizontal (horizontal frequency 15734 Hz)
|
||||
.htot= 63.55582f, // total scanline in [us]
|
||||
.hfront= 1.50000f, // H front porch (after image, before HSYNC) in [us]
|
||||
.hsync= 4.70000f, // H sync pulse in [us]
|
||||
.hback= 4.50000f, // H back porch (after HSYNC, before image) in [us]
|
||||
.hfull= 47.03130f, // H full visible in [us]
|
||||
|
||||
// vertical
|
||||
.vtot=262, // total scanlines (both subframes)
|
||||
.vmax=240, // maximal height
|
||||
|
||||
// subframe 1
|
||||
.vsync1=3, // V sync (half-)pulses on subframe 1
|
||||
.vpost1=0, // V sync post half-pulses on subframe 1
|
||||
.vback1=10+2+3, // V back porch (after VSYNC, before image) on subframe 1
|
||||
.vact1=240, // active visible scanlines, subframe 1
|
||||
.vfront1=1+3, // V front porch (after image, before VSYNC) on subframe 1
|
||||
.vpre1=0, // V sync pre half-pulses on subframe 1
|
||||
|
||||
// subframe 2 (ignored if not interlaced)
|
||||
.vsync2=0, // V sync half-pulses on subframe 2
|
||||
.vpost2=0, // V sync post half-pulses on subframe 2
|
||||
.vback2=0, // V back porch (after VSYNC, before image) on subframe 2
|
||||
.vact2=0, // active visible scanlines, subframe 2
|
||||
.vfront2=0, // V front porch (after image, before VSYNC) on subframe 2
|
||||
.vpre2=6, // V sync pre half-pulses on subframe 2
|
||||
|
||||
// name
|
||||
.name = "NTSCp", // video timing name (VIDEO_NAME_LEN characters + terminating 0)
|
||||
|
||||
// flags
|
||||
.inter=False, // interlaced (use subframes)
|
||||
.psync=False, // positive synchronization
|
||||
.odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL)
|
||||
};
|
||||
|
||||
// === Monitor videomodes
|
||||
|
||||
|
|
@ -266,29 +41,16 @@ const sVideo VideoEGA = {
|
|||
.vtot=449, // total scanlines (both subframes)
|
||||
.vmax=400, // maximal height
|
||||
|
||||
// subframe 1
|
||||
.vsync1=2, // V sync (half-)pulses on subframe 1
|
||||
.vpost1=0, // V sync post half-pulses on subframe 1
|
||||
.vback1=35, // V back porch (after VSYNC, before image) on subframe 1
|
||||
.vact1=400, // active visible scanlines, subframe 1
|
||||
.vfront1=12, // V front porch (after image, before VSYNC) on subframe 1
|
||||
.vpre1=0, // V sync pre half-pulses on subframe 1
|
||||
|
||||
// subframe 2 (ignored if not interlaced)
|
||||
.vsync2=0, // V sync half-pulses on subframe 2
|
||||
.vpost2=0, // V sync post half-pulses on subframe 2
|
||||
.vback2=0, // V back porch (after VSYNC, before image) on subframe 2
|
||||
.vact2=0, // active visible scanlines, subframe 2
|
||||
.vfront2=0, // V front porch (after image, before VSYNC) on subframe 2
|
||||
.vpre2=0, // V sync pre half-pulses on subframe 2
|
||||
|
||||
// name
|
||||
.name = "EGA ", // video timing name (VIDEO_NAME_LEN characters + terminating 0)
|
||||
// frame
|
||||
.vsync=2, // V sync (half-)pulses
|
||||
.vpost=0, // V sync post half-pulses
|
||||
.vback=35, // V back porch (after VSYNC, before image)
|
||||
.vact=400, // active visible scanlines
|
||||
.vfront=12, // V front porch (after image, before VSYNC)
|
||||
.vpre=0, // V sync pre half-pulses
|
||||
|
||||
// flags
|
||||
.inter=False, // interlaced (use subframes)
|
||||
.psync=False, // positive synchronization
|
||||
.odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL)
|
||||
};
|
||||
|
||||
// VGA 4:3 640x480 (16:9 848x480), vert. 60 Hz, hor. 31.4685 kHz, pixel clock 25.175 MHz
|
||||
|
|
@ -304,182 +66,38 @@ const sVideo VideoVGA = {
|
|||
.vtot=525, // total scanlines (both subframes)
|
||||
.vmax=480, // maximal height
|
||||
|
||||
// subframe 1
|
||||
.vsync1=2, // V sync (half-)pulses on subframe 1
|
||||
.vpost1=0, // V sync post half-pulses on subframe 1
|
||||
.vback1=33, // V back porch (after VSYNC, before image) on subframe 1
|
||||
.vact1=480, // active visible scanlines, subframe 1
|
||||
.vfront1=10, // V front porch (after image, before VSYNC) on subframe 1
|
||||
.vpre1=0, // V sync pre half-pulses on subframe 1
|
||||
|
||||
// subframe 2 (ignored if not interlaced)
|
||||
.vsync2=0, // V sync half-pulses on subframe 2
|
||||
.vpost2=0, // V sync post half-pulses on subframe 2
|
||||
.vback2=0, // V back porch (after VSYNC, before image) on subframe 2
|
||||
.vact2=0, // active visible scanlines, subframe 2
|
||||
.vfront2=0, // V front porch (after image, before VSYNC) on subframe 2
|
||||
.vpre2=0, // V sync pre half-pulses on subframe 2
|
||||
|
||||
// name
|
||||
.name = "VGA ", // video timing name (VIDEO_NAME_LEN characters + terminating 0)
|
||||
// frame
|
||||
.vsync=2, // V sync (half-)pulses
|
||||
.vpost=0, // V sync post half-pulses
|
||||
.vback=33, // V back porch (after VSYNC, before image)
|
||||
.vact=480, // active visible scanlines
|
||||
.vfront=10, // V front porch (after image, before VSYNC)
|
||||
.vpre=0, // V sync pre half-pulses
|
||||
|
||||
// flags
|
||||
.inter=False, // interlaced (use subframes)
|
||||
.psync=False, // positive synchronization
|
||||
.odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL)
|
||||
};
|
||||
|
||||
// SVGA 4:3 800x600 (16:9 1064x600), vert. 60 Hz, hor. 37.897 kHz, pixel clock 40 MHz
|
||||
const sVideo VideoSVGA = {
|
||||
// horizontal
|
||||
.htot= 26.40000f, // total scanline in [us] (1056 pixels)
|
||||
.hfront= 1.00000f, // H front porch (after image, before HSYNC) in [us] (40 pixels)
|
||||
.hsync= 3.20000f, // H sync pulse in [us] (128 pixels)
|
||||
.hback= 2.20000f, // H back porch (after HSYNC, before image) in [us] (88 pixels)
|
||||
.hfull= 20.00000f, // H full visible in [us] (800 pixels)
|
||||
|
||||
// vertical
|
||||
.vtot=628, // total scanlines (both subframes)
|
||||
.vmax=600, // maximal height
|
||||
|
||||
// subframe 1
|
||||
.vsync1=4, // V sync (half-)pulses on subframe 1
|
||||
.vpost1=0, // V sync post half-pulses on subframe 1
|
||||
.vback1=23, // V back porch (after VSYNC, before image) on subframe 1
|
||||
.vact1=600, // active visible scanlines, subframe 1
|
||||
.vfront1=1, // V front porch (after image, before VSYNC) on subframe 1
|
||||
.vpre1=0, // V sync pre half-pulses on subframe 1
|
||||
|
||||
// subframe 2 (ignored if not interlaced)
|
||||
.vsync2=0, // V sync half-pulses on subframe 2
|
||||
.vpost2=0, // V sync post half-pulses on subframe 2
|
||||
.vback2=0, // V back porch (after VSYNC, before image) on subframe 2
|
||||
.vact2=0, // active visible scanlines, subframe 2
|
||||
.vfront2=0, // V front porch (after image, before VSYNC) on subframe 2
|
||||
.vpre2=0, // V sync pre half-pulses on subframe 2
|
||||
|
||||
// name
|
||||
.name = "SVGA ", // video timing name (VIDEO_NAME_LEN characters + terminating 0)
|
||||
|
||||
// flags
|
||||
.inter=False, // interlaced (use subframes)
|
||||
.psync=True, // positive synchronization
|
||||
.odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL)
|
||||
// timings
|
||||
const sVideo* VideoResTab[DEV_MAX*RES_MAX] =
|
||||
{
|
||||
// DEV_VGA
|
||||
&VideoEGA, // RES_ZX = 0, // 256x192
|
||||
&VideoVGA, // RES_CGA, // 320x200
|
||||
&VideoVGA, // RES_QVGA, // 320x240
|
||||
&VideoEGA, // RES_EGA, // 528x400
|
||||
&VideoVGA, // RES_VGA, // 640x480
|
||||
};
|
||||
|
||||
// XGA 4:3 1024x768 (16:9 1360x768), vert. 60 Hz, hor. 48.36310 kHz, pixel clock 65 MHz
|
||||
const sVideo VideoXGA = {
|
||||
// horizontal
|
||||
.htot= 20.67692f, // total scanline in [us] (1344 pixels)
|
||||
.hfront= 0.36923f, // H front porch (after image, before HSYNC) in [us] (24 pixels)
|
||||
.hsync= 2.09231f, // H sync pulse in [us] (136 pixels)
|
||||
.hback= 2.46154f, // H back porch (after HSYNC, before image) in [us] (160 pixels)
|
||||
.hfull= 15.75385f, // H full visible in [us] (1024 pixels)
|
||||
|
||||
// vertical
|
||||
.vtot=806, // total scanlines (both subframes)
|
||||
.vmax=768, // maximal height
|
||||
|
||||
// subframe 1
|
||||
.vsync1=6, // V sync (half-)pulses on subframe 1
|
||||
.vpost1=0, // V sync post half-pulses on subframe 1
|
||||
.vback1=29, // V back porch (after VSYNC, before image) on subframe 1
|
||||
.vact1=768, // active visible scanlines, subframe 1
|
||||
.vfront1=3, // V front porch (after image, before VSYNC) on subframe 1
|
||||
.vpre1=0, // V sync pre half-pulses on subframe 1
|
||||
|
||||
// subframe 2 (ignored if not interlaced)
|
||||
.vsync2=0, // V sync half-pulses on subframe 2
|
||||
.vpost2=0, // V sync post half-pulses on subframe 2
|
||||
.vback2=0, // V back porch (after VSYNC, before image) on subframe 2
|
||||
.vact2=0, // active visible scanlines, subframe 2
|
||||
.vfront2=0, // V front porch (after image, before VSYNC) on subframe 2
|
||||
.vpre2=0, // V sync pre half-pulses on subframe 2
|
||||
|
||||
// name
|
||||
.name = "XGA ", // video timing name (VIDEO_NAME_LEN characters + terminating 0)
|
||||
|
||||
// flags
|
||||
.inter=False, // interlaced (use subframes)
|
||||
.psync=False, // positive synchronization
|
||||
.odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL)
|
||||
};
|
||||
|
||||
// VESA 4:3 1152x864, vert. 60 Hz, hor. 53.697 kHz, pixel clock 81.62 MHz
|
||||
const sVideo VideoVESA = {
|
||||
// horizontal
|
||||
.htot= 18.62289f, // total scanline in [us] (1520 pixels)
|
||||
.hfront= 0.78412f, // H front porch (after image, before HSYNC) in [us] (64 pixels)
|
||||
.hsync= 1.47023f, // H sync pulse in [us] (120 pixels)
|
||||
.hback= 2.25435f, // H back porch (after HSYNC, before image) in [us] (184 pixels)
|
||||
.hfull= 14.11419f, // H full visible in [us] (1152 pixels)
|
||||
|
||||
// vertical
|
||||
.vtot=895, // total scanlines (both subframes)
|
||||
.vmax=864, // maximal height
|
||||
|
||||
// subframe 1
|
||||
.vsync1=3, // V sync (half-)pulses on subframe 1
|
||||
.vpost1=0, // V sync post half-pulses on subframe 1
|
||||
.vback1=27, // V back porch (after VSYNC, before image) on subframe 1
|
||||
.vact1=864, // active visible scanlines, subframe 1
|
||||
.vfront1=1, // V front porch (after image, before VSYNC) on subframe 1
|
||||
.vpre1=0, // V sync pre half-pulses on subframe 1
|
||||
|
||||
// subframe 2 (ignored if not interlaced)
|
||||
.vsync2=0, // V sync half-pulses on subframe 2
|
||||
.vpost2=0, // V sync post half-pulses on subframe 2
|
||||
.vback2=0, // V back porch (after VSYNC, before image) on subframe 2
|
||||
.vact2=0, // active visible scanlines, subframe 2
|
||||
.vfront2=0, // V front porch (after image, before VSYNC) on subframe 2
|
||||
.vpre2=0, // V sync pre half-pulses on subframe 2
|
||||
|
||||
// name
|
||||
.name = "VESA ", // video timing name (VIDEO_NAME_LEN characters + terminating 0)
|
||||
|
||||
// flags
|
||||
.inter=False, // interlaced (use subframes)
|
||||
.psync=True, // positive synchronization
|
||||
.odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL)
|
||||
};
|
||||
|
||||
// HD 4:3 1280x960, vert. 53 Hz, hor. 51.858 kHz, pixel clock 102.1 MHz
|
||||
#define HD_SLOW 1.15f
|
||||
const sVideo VideoHD = {
|
||||
// horizontal
|
||||
.htot= 16.76787f*HD_SLOW, // total scanline in [us] (1712 pixels)
|
||||
.hfront= 0.78355f*HD_SLOW, // H front porch (after image, before HSYNC) in [us] (80 pixels)
|
||||
.hsync= 1.33203f*HD_SLOW, // H sync pulse in [us] (136 pixels)
|
||||
.hback= 2.11557f*HD_SLOW, // H back porch (after HSYNC, before image) in [us] (216 pixels)
|
||||
.hfull= 12.53673f*HD_SLOW, // H full visible in [us] (1280 pixels)
|
||||
|
||||
// vertical
|
||||
.vtot=994-10, // total scanlines (both subframes)
|
||||
.vmax=960, // maximal height
|
||||
|
||||
// subframe 1
|
||||
.vsync1=3, // V sync (half-)pulses on subframe 1
|
||||
.vpost1=0, // V sync post half-pulses on subframe 1
|
||||
.vback1=30-10, // V back porch (after VSYNC, before image) on subframe 1
|
||||
.vact1=960, // active visible scanlines, subframe 1
|
||||
.vfront1=1, // V front porch (after image, before VSYNC) on subframe 1
|
||||
.vpre1=0, // V sync pre half-pulses on subframe 1
|
||||
|
||||
// subframe 2 (ignored if not interlaced)
|
||||
.vsync2=0, // V sync half-pulses on subframe 2
|
||||
.vpost2=0, // V sync post half-pulses on subframe 2
|
||||
.vback2=0, // V back porch (after VSYNC, before image) on subframe 2
|
||||
.vact2=0, // active visible scanlines, subframe 2
|
||||
.vfront2=0, // V front porch (after image, before VSYNC) on subframe 2
|
||||
.vpre2=0, // V sync pre half-pulses on subframe 2
|
||||
|
||||
// name
|
||||
.name = "HD ", // video timing name (VIDEO_NAME_LEN characters + terminating 0)
|
||||
|
||||
// flags
|
||||
.inter=False, // interlaced (use subframes)
|
||||
.psync=False, // positive synchronization
|
||||
.odd=False, // first sub-frame is odd lines 1, 3, 5,... (PAL)
|
||||
// required resolution width x height
|
||||
const u16 VideoResReq[RES_MAX*2] =
|
||||
{
|
||||
256, 192, // RES_ZX = 0, // 256x192
|
||||
320, 200, // RES_CGA, // 320x200
|
||||
320, 240, // RES_QVGA, // 320x240
|
||||
512, 400, // RES_EGA, // 512x400
|
||||
640, 480, // RES_VGA, // 640x480
|
||||
};
|
||||
|
||||
|
||||
|
|
@ -582,59 +200,23 @@ void VgaCfgDef(sVgaCfg* cfg)
|
|||
cfg->height = 480; // height in lines
|
||||
cfg->wfull = 0; // width of full screen, corresponding to 'hfull' time (0=use 'width' parameter)
|
||||
cfg->video = &VideoVGA; // used video timings
|
||||
cfg->freq = 250000; //120000; // required minimal system frequency in kHz (real frequency can be higher)
|
||||
uint freq = clock_get_hz(clk_sys)/1000;
|
||||
cfg->freq = freq; // required minimal system frequency in kHz (real frequency can be higher)
|
||||
cfg->fmax = 270000; // maximal system frequency in kHz (limit resolution if needed)
|
||||
cfg->mode[0] = LAYERMODE_BASE; // modes of overlapped layers 0..3 LAYERMODE_* (LAYERMODE_BASE = layer is off)
|
||||
cfg->mode[1] = LAYERMODE_BASE; // - mode of layer 0 is ignored (always use LAYERMODE_BASE)
|
||||
cfg->mode[2] = LAYERMODE_BASE; // - all overlapped layers must use same layer program
|
||||
cfg->mode[3] = LAYERMODE_BASE;
|
||||
cfg->dbly = False; // double in Y direction
|
||||
cfg->lockfreq = False; // lock required frequency, do not change it
|
||||
}
|
||||
|
||||
// debug print videomode setup
|
||||
void VgaPrintCfg(const sVmode* vmode)
|
||||
{
|
||||
printf("width=%u height=%u wfull=%u wmax=%u\n", vmode->width, vmode->height, vmode->wfull, vmode->wmax);
|
||||
printf("freq=%u vco=%u fbdiv=%u pd1=%u pd2=%u\n", vmode->freq, vmode->vco, vmode->fbdiv, vmode->pd1, vmode->pd2);
|
||||
printf("div=%u cpp=%u prog=%u mode=%u %u %u %u\n", vmode->div, vmode->cpp, vmode->prog, vmode->mode[0], vmode->mode[1], vmode->mode[2], vmode->mode[3]);
|
||||
printf("htot=%u hfront=%u hsync=%u hback=%u\n", vmode->htot, vmode->hfront, vmode->hsync, vmode->hback);
|
||||
printf("vtot=%u vmax=%u\n", vmode->vtot, vmode->vmax);
|
||||
printf("vsync1=%u vpost1=%u vback1=%u vact1=%u vfront1=%u vpre1=%u vfirst1=%u\n", vmode->vsync1, vmode->vpost1,
|
||||
vmode->vback1, vmode->vact1, vmode->vfront1, vmode->vpre1, vmode->vfirst1);
|
||||
printf("vsync2=%u vpost2=%u vback2=%u vact2=%u vfront2=%u vpre2=%u vfirst2=%u\n", vmode->vsync2, vmode->vpost2,
|
||||
vmode->vback2, vmode->vact2, vmode->vfront2, vmode->vpre2, vmode->vfirst2);
|
||||
printf("lockfreq=%u dbly=%u inter=%u psync=%u odd=%u\n", vmode->lockfreq, vmode->dbly, vmode->inter, vmode->psync, vmode->odd);
|
||||
}
|
||||
|
||||
|
||||
// calculate videomode setup
|
||||
// cfg ... required configuration
|
||||
// vmode ... destination videomode setup for driver
|
||||
void VgaCfg(const sVgaCfg* cfg, sVmode* vmode)
|
||||
{
|
||||
int i;
|
||||
|
||||
// prepare layer program, copy layer modes
|
||||
u8 prog = LAYERMODE_BASE;
|
||||
vmode->mode[0] = prog;
|
||||
for (i = 1; i < LAYERS; i++)
|
||||
{
|
||||
if (cfg->mode[i] != LAYERMODE_BASE) prog = LayerMode[cfg->mode[i]].prog;
|
||||
vmode->mode[i] = cfg->mode[i];
|
||||
}
|
||||
vmode->prog = prog;
|
||||
|
||||
// prepare minimal and maximal clocks per pixel
|
||||
int mincpp = LayerMode[LAYERMODE_BASE].mincpp;
|
||||
int maxcpp = LayerMode[LAYERMODE_BASE].maxcpp;
|
||||
int cpp;
|
||||
for (i = 1; i < LAYERS; i++)
|
||||
{
|
||||
cpp = LayerMode[cfg->mode[i]].mincpp;
|
||||
if (cpp > mincpp) mincpp = cpp;
|
||||
cpp = LayerMode[cfg->mode[i]].maxcpp;
|
||||
if (cpp < maxcpp) maxcpp = cpp;
|
||||
}
|
||||
int mincpp = 2;
|
||||
int maxcpp = 17;
|
||||
|
||||
// prepare full width
|
||||
int w = cfg->width; // required width
|
||||
|
|
@ -649,7 +231,7 @@ void VgaCfg(const sVgaCfg* cfg, sVmode* vmode)
|
|||
|
||||
// calculate cpp from required frequency (rounded down), limit minimal cpp
|
||||
u32 freq = cfg->freq;
|
||||
cpp = (int)(freq*hfull/1000/wfull + 0.1f);
|
||||
int cpp = (int)(freq*hfull/1000/wfull + 0.1f);
|
||||
if (cpp < mincpp) cpp = mincpp;
|
||||
|
||||
// recalculate frequency if not locked
|
||||
|
|
@ -732,14 +314,6 @@ void VgaCfg(const sVgaCfg* cfg, sVmode* vmode)
|
|||
}
|
||||
|
||||
htot = hfront + hsync + hback + hwidth; // total state machine clocks per line
|
||||
|
||||
// interliced htot must be even (to enable split to half-sync)
|
||||
if (v->inter && ((htot & 1) != 0))
|
||||
{
|
||||
htot--;
|
||||
hfront++;
|
||||
}
|
||||
|
||||
vmode->htot = (u16)htot; // total state machine clocks per line
|
||||
vmode->hfront = (u16)hfront; // H front porch in state machine clocks (min. 2)
|
||||
vmode->hsync = (u16)hsync; // H sync pulse in state machine clocks (min. 4)
|
||||
|
|
@ -752,21 +326,7 @@ void VgaCfg(const sVgaCfg* cfg, sVmode* vmode)
|
|||
if (h > v->vmax) h = v->vmax; // limit height
|
||||
if (cfg->dbly) h &= ~1; // must be even number if double lines
|
||||
|
||||
int vact1 = h; // active lines in progress mode
|
||||
int vact2 = 0;
|
||||
if (v->inter) // interlaced
|
||||
{
|
||||
if (v->odd) // first frame is odd lines
|
||||
{
|
||||
vact1 = h/2;
|
||||
vact2 = (h+1)/2; // if even lines, even frame will have more lines
|
||||
}
|
||||
else
|
||||
{
|
||||
vact1 = (h+1)/2; // if even lines, even frame will have more lines
|
||||
vact2 = h/2;
|
||||
}
|
||||
}
|
||||
int vact = h; // active lines in progress mode
|
||||
|
||||
if (cfg->dbly) h /= 2; // return double lines to single lines
|
||||
vmode->height = h;
|
||||
|
|
@ -774,125 +334,37 @@ void VgaCfg(const sVgaCfg* cfg, sVmode* vmode)
|
|||
// vertical timings
|
||||
vmode->vtot = v->vtot; // total scanlines
|
||||
|
||||
vmode->vact1 = vact1; // active scanlines of 1st subframe
|
||||
int dh = vact1 - v->vact1; // difference
|
||||
vmode->vsync1 = v->vsync1; // V sync (half-)pulses on subframe 1
|
||||
vmode->vpost1 = v->vpost1; // V sync post (half-)pulses on subframe 1
|
||||
vmode->vback1 = v->vback1 - dh/2; // V back porch (after VSYNC, before image) on subframe 1
|
||||
vmode->vfront1 = v->vfront1 - ((dh < 0) ? (dh-1)/2 : (dh+1)/2); // V front porch (after image, before VSYNC) on subframe 1
|
||||
vmode->vpre1 = v->vpre1; // V sync pre (half-)pulses on subframe 1
|
||||
|
||||
vmode->vact2 = vact2; // active scanlines of 2nd subframe
|
||||
dh = vact2 - v->vact2; // difference
|
||||
vmode->vsync2 = v->vsync2; // V sync half-pulses on subframe 2
|
||||
vmode->vpost2 = v->vpost2; // V sync post half-pulses on subframe 2
|
||||
vmode->vback2 = v->vback2 - dh/2; // V back porch (after VSYNC, before image) on subframe 2
|
||||
vmode->vfront2 = v->vfront2 - ((dh < 0) ? (dh-1)/2 : (dh+1)/2); // V front porch (after image, before VSYNC) on subframe 2
|
||||
vmode->vpre2 = v->vpre2; // V sync pre half-pulses on subframe 2
|
||||
vmode->vact = vact; // active scanlines
|
||||
int dh = vact - v->vact; // difference
|
||||
vmode->vsync = v->vsync; // V sync (half-)pulses
|
||||
vmode->vpost = v->vpost; // V sync post (half-)pulses
|
||||
vmode->vback = v->vback - dh/2; // V back porch (after VSYNC, before image)
|
||||
vmode->vfront = v->vfront - ((dh < 0) ? (dh-1)/2 : (dh+1)/2); // V front porch (after image, before VSYNC)
|
||||
vmode->vpre = v->vpre; // V sync pre (half-)pulses
|
||||
|
||||
// frequency
|
||||
vmode->hfreq = vmode->freq * 1000.0f / vmode->div / vmode->htot;
|
||||
vmode->vfreq = vmode->hfreq / vmode->vtot;
|
||||
|
||||
// name
|
||||
vmode->name = v->name; // video timing name
|
||||
|
||||
// flags
|
||||
vmode->lockfreq = cfg->lockfreq; // lock current frequency, do not change it
|
||||
vmode->dbly = cfg->dbly; // double scanlines
|
||||
vmode->inter = v->inter; // interlaced (use sub-frames)
|
||||
vmode->psync = v->psync; // positive synchronization
|
||||
vmode->odd = v->odd; // first sub-frame is odd lines 1, 3, 5,... (PAL)
|
||||
|
||||
// first active scanline
|
||||
if (v->inter)
|
||||
{
|
||||
// interlaced
|
||||
vmode->vfirst1 = (vmode->vsync1 + vmode->vpost1)/2 + vmode->vback1 + 1;
|
||||
vmode->vfirst2 = vmode->vfirst1 + vmode->vact1 + vmode->vfront1 +
|
||||
(vmode->vpre1 + vmode->vsync2 + vmode->vpost2)/2 + vmode->vback2;
|
||||
}
|
||||
else
|
||||
{
|
||||
// progressive
|
||||
vmode->vfirst1 = vmode->vsync1 + vmode->vback1 + 1;
|
||||
vmode->vfirst2 = 0;
|
||||
}
|
||||
vmode->vfirst = vmode->vsync + vmode->vback + 1;
|
||||
}
|
||||
|
||||
// timings
|
||||
const sVideo* VideoResTab[DEV_MAX*RES_MAX] =
|
||||
{
|
||||
// DEV_PAL
|
||||
&VideoPALp, // RES_ZX = 0, // 256x192
|
||||
&VideoPALp, // RES_CGA, // 320x200
|
||||
&VideoPALp, // RES_QVGA, // 320x240
|
||||
&VideoPAL, // RES_EGA, // 528x400
|
||||
&VideoPAL, // RES_VGA, // 640x480
|
||||
&VideoPAL, // RES_SVGA, // 800x600 (not for TV device)
|
||||
&VideoPAL, // RES_XGA, // 1024x768 (not for TV device)
|
||||
&VideoPAL, // RES_HD, // 1280x960 (not for TV device)
|
||||
|
||||
// DEV_NTSC
|
||||
&VideoNTSCp, // RES_ZX = 0, // 256x192
|
||||
&VideoNTSCp, // RES_CGA, // 320x200
|
||||
&VideoNTSCp, // RES_QVGA, // 320x240
|
||||
&VideoNTSC, // RES_EGA, // 528x400
|
||||
&VideoNTSC, // RES_VGA, // 640x480
|
||||
&VideoNTSC, // RES_SVGA, // 800x600 (not for TV device)
|
||||
&VideoNTSC, // RES_XGA, // 1024x768 (not for TV device)
|
||||
&VideoNTSC, // RES_HD, // 1280x960 (not for TV device)
|
||||
|
||||
// DEV_VGA
|
||||
&VideoEGA, // RES_ZX = 0, // 256x192
|
||||
&VideoVGA, // RES_CGA, // 320x200
|
||||
&VideoVGA, // RES_QVGA, // 320x240
|
||||
&VideoEGA, // RES_EGA, // 528x400
|
||||
&VideoVGA, // RES_VGA, // 640x480
|
||||
&VideoSVGA, // RES_SVGA, // 800x600 (not for TV device)
|
||||
&VideoXGA, // RES_XGA, // 1024x768 (not for TV device)
|
||||
&VideoHD, // RES_HD, // 1280x960 (not for TV device)
|
||||
};
|
||||
|
||||
// required resolution width x height
|
||||
const u16 VideoResReq[RES_MAX*2] =
|
||||
{
|
||||
256, 192, // RES_ZX = 0, // 256x192
|
||||
320, 200, // RES_CGA, // 320x200
|
||||
320, 240, // RES_QVGA, // 320x240
|
||||
512, 400, // RES_EGA, // 512x400
|
||||
640, 480, // RES_VGA, // 640x480
|
||||
800, 600, // RES_SVGA, // 800x600 (not for TV device)
|
||||
1024, 768, // RES_XGA, // 1024x768 (not for TV device)
|
||||
1280, 960, // RES_HD, // 1280x960 (not for TV device)
|
||||
};
|
||||
|
||||
// initialize videomode
|
||||
// dev ... device DEV_*
|
||||
// res ... resolution RES_*
|
||||
// form ... format FORM_*
|
||||
// buf ... pointer to frame buffer (must be aligned to 4-bites, use ALIGNED attribute)
|
||||
// buf2 ...pointer to additional buffer:
|
||||
// FORM_TILE: pointer to column of tiles 32x32 in 8-bit graphics
|
||||
// FORM_TEXT: pointer to font 8x16 or 8x8 (size 4 KB or 2 KB, ALIGNED attribute, should be in RAM)
|
||||
// - copy font to 4KB or 2 KB RAM buffer with ALIGNED attribute
|
||||
// - text uses color attributes PC_*
|
||||
// FORM_RLE: pointer to image rows (ALIGNED attribute, should be in RAM)
|
||||
// JMH
|
||||
const sVmode* Video(u8 dev, u8 res, u8 form, u8* buf, const void* buf2 /* = FontBoldB8x16 */)
|
||||
const sVmode* Video(u8 dev, u8 res)
|
||||
{
|
||||
// stop VGA core
|
||||
// JMH
|
||||
//multicore_reset_core1();
|
||||
|
||||
// run VGA core
|
||||
// JMH
|
||||
//multicore_launch_core1(VgaCore);
|
||||
|
||||
// prepare timings structure
|
||||
if (dev >= DEV_MAX) dev = DEV_VGA;
|
||||
if (res >= RES_MAX) res = RES_MAX-1;
|
||||
if (form >= FORM_MAX) form = FORM_MAX-1;
|
||||
const sVideo* v = VideoResTab[dev*RES_MAX + res];
|
||||
|
||||
// required resolution
|
||||
|
|
@ -900,121 +372,17 @@ const sVmode* Video(u8 dev, u8 res, u8 form, u8* buf, const void* buf2 /* = Font
|
|||
u16 h = VideoResReq[res*2+1];
|
||||
if (h > v->vmax) h = v->vmax;
|
||||
|
||||
if ((form == FORM_TEXT8) || (form == FORM_MTEXT8))
|
||||
{
|
||||
w = w/8*8;
|
||||
h = h/8*8;
|
||||
}
|
||||
|
||||
if ((form == FORM_TEXT16) || (form == FORM_MTEXT16))
|
||||
{
|
||||
w = w/8*8;
|
||||
h = h/16*16;
|
||||
}
|
||||
|
||||
// setup videomode
|
||||
VgaCfgDef(&Cfg); // get default configuration
|
||||
Cfg.video = v; // video timings
|
||||
Cfg.width = w; // screen width
|
||||
Cfg.height = h; // screen height
|
||||
if (form == FORM_RLE) Cfg.mode[1] = LAYERMODE_RLE;
|
||||
Cfg.dbly = h <= v->vmax/2; // double scanlines
|
||||
VgaCfg(&Cfg, &Vmode); // calculate videomode setup
|
||||
|
||||
// initialize base layer 0
|
||||
ScreenClear(pScreen);
|
||||
sStrip* t = ScreenAddStrip(pScreen, h);
|
||||
sSegm* g = ScreenAddSegm(t, w);
|
||||
switch (form)
|
||||
{
|
||||
case FORM_8BIT: // 8-bit pixel graphics (up to EGA resolution)
|
||||
ScreenSegmGraph8(g, buf, w);
|
||||
Canvas.img = buf;
|
||||
Canvas.w = w;
|
||||
Canvas.h = h;
|
||||
Canvas.wb = w;
|
||||
Canvas.format = CANVAS_8;
|
||||
break;
|
||||
|
||||
case FORM_4BIT: // 4-bit pixel graphics (up to SVGA graphics)
|
||||
GenPal16Trans(Pal16Trans, DefPal16); // generate palette translation table
|
||||
ScreenSegmGraph4(g, buf, Pal16Trans, w/2);
|
||||
Canvas.img = buf;
|
||||
Canvas.w = w;
|
||||
Canvas.h = h;
|
||||
Canvas.wb = w/2;
|
||||
Canvas.format = CANVAS_4;
|
||||
break;
|
||||
|
||||
case FORM_MONO: // 1-bit pixel graphics
|
||||
ScreenSegmGraph1(g, buf, COL_BLACK, COL_WHITE, w/8);
|
||||
Canvas.img = buf;
|
||||
Canvas.w = w;
|
||||
Canvas.h = h;
|
||||
Canvas.wb = w/8;
|
||||
Canvas.format = CANVAS_1;
|
||||
break;
|
||||
|
||||
case FORM_TILE8: // 8x8 tiles
|
||||
ScreenSegmTile(g, buf, buf2, 8, 8, (w+7)/8);
|
||||
break;
|
||||
|
||||
case FORM_TILE12: // 12x12 tiles
|
||||
ScreenSegmTile(g, buf, buf2, 12, 12, (w+11)/12);
|
||||
break;
|
||||
|
||||
case FORM_TILE16: // 16x16 tiles
|
||||
ScreenSegmTile(g, buf, buf2, 16, 16, (w+15)/16);
|
||||
break;
|
||||
|
||||
case FORM_TILE24: // 24x24 tiles
|
||||
ScreenSegmTile(g, buf, buf2, 24, 24, (w+23)/24);
|
||||
break;
|
||||
|
||||
case FORM_TILE32: // 32x32 tiles
|
||||
ScreenSegmTile(g, buf, buf2, 32, 32, (w+31)/32);
|
||||
break;
|
||||
|
||||
case FORM_TILE48: // 48x48 tiles
|
||||
ScreenSegmTile(g, buf, buf2, 48, 48, (w+47)/48);
|
||||
break;
|
||||
|
||||
case FORM_TILE64: // 64x64 tiles
|
||||
ScreenSegmTile(g, buf, buf2, 64, 64, (w+63)/64);
|
||||
break;
|
||||
|
||||
case FORM_MTEXT8: // mono text with font 8x8
|
||||
ScreenSegmMText(g, buf, buf2, 8, COL_BLACK, COL_WHITE, w/8);
|
||||
break;
|
||||
|
||||
case FORM_MTEXT16: // mono text with font 8x16
|
||||
ScreenSegmMText(g, buf, buf2, 16, COL_BLACK, COL_WHITE, w/8);
|
||||
break;
|
||||
|
||||
case FORM_TEXT8: // attribute text with font 8x8
|
||||
ScreenSegmAText(g, buf, buf2, 8, DefPal16, w/8*2);
|
||||
break;
|
||||
|
||||
case FORM_TEXT16: // attribute text with font 8x16
|
||||
ScreenSegmAText(g, buf, buf2, 16, DefPal16, w/8*2);
|
||||
break;
|
||||
|
||||
case FORM_RLE: // images with RLE compression (on overlapped layer 1)
|
||||
ScreenSegmColor(g, 0, 0);
|
||||
LayerSetup(1, buf, &Vmode, w, h, 0, buf2);
|
||||
LayerOn(1);
|
||||
break;
|
||||
}
|
||||
|
||||
// initialize system clock
|
||||
set_sys_clock_pll(Vmode.vco*1000, Vmode.pd1, Vmode.pd2);
|
||||
|
||||
|
||||
|
||||
// initialize videomode
|
||||
// JMH
|
||||
//VgaInitReq(&Vmode);
|
||||
|
||||
return &Vmode;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,15 +1,16 @@
|
|||
|
||||
// ****************************************************************************
|
||||
//
|
||||
// VGA videomodes
|
||||
//
|
||||
// file derived from the PicoVGA project
|
||||
// https://github.com/Panda381/PicoVGA
|
||||
// by Miroslav Nemecek
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
#ifndef _VGA_VMODE_H
|
||||
#define _VGA_VMODE_H
|
||||
|
||||
#define VIDEO_NAME_LEN 5 // length of video timing name
|
||||
|
||||
// video timings
|
||||
typedef struct {
|
||||
// horizontal
|
||||
|
|
@ -23,64 +24,26 @@ typedef struct {
|
|||
u16 vtot; // total scanlines (both subframes)
|
||||
u16 vmax; // maximal height
|
||||
|
||||
// subframe 1
|
||||
u16 vsync1; // V sync (half-)pulses on subframe 1
|
||||
u16 vpost1; // V sync post half-pulses on subframe 1
|
||||
u16 vback1; // V back porch (after VSYNC, before image) on subframe 1
|
||||
u16 vact1; // active visible scanlines, subframe 1
|
||||
u16 vfront1; // V front porch (after image, before VSYNC) on subframe 1
|
||||
u16 vpre1; // V sync pre half-pulses on subframe 1
|
||||
// frame
|
||||
u16 vsync; // V sync (half-)pulses
|
||||
u16 vpost; // V sync post half-pulses
|
||||
u16 vback; // V back porch (after VSYNC, before image)
|
||||
u16 vact; // active visible scanlines
|
||||
u16 vfront; // V front porch (after image, before VSYNC)
|
||||
u16 vpre; // V sync pre half-pulses
|
||||
|
||||
// subframe 2 (ignored if not interlaced)
|
||||
u16 vsync2; // V sync half-pulses on subframe 2
|
||||
u16 vpost2; // V sync post half-pulses on subframe 2
|
||||
u16 vback2; // V back porch (after VSYNC, before image) on subframe 2
|
||||
u16 vact2; // active visible scanlines, subframe 2
|
||||
u16 vfront2; // V front porch (after image, before VSYNC) on subframe 2
|
||||
u16 vpre2; // V sync pre half-pulses on subframe 2
|
||||
|
||||
// name
|
||||
const char* name; // video timing name (VIDEO_NAME_LEN characters + terminating 0)
|
||||
|
||||
// flags
|
||||
bool inter; // interlaced (use subframes)
|
||||
bool psync; // positive synchronization
|
||||
bool odd; // first sub-frame is odd lines 1, 3, 5,... (PAL)
|
||||
} sVideo;
|
||||
|
||||
// === TV videomodes
|
||||
|
||||
// TV PAL interlaced 5:4 720x576 (4:3 768x576, 16:9 1024x576)
|
||||
extern const sVideo VideoPAL;
|
||||
|
||||
// TV PAL progressive 5:4 360x288 (4:3 384x288, 16:9 512x288)
|
||||
extern const sVideo VideoPALp;
|
||||
|
||||
// TV NTSC interlaced 4:3 640x480 (5:4 600x480, 16:9 848x480)
|
||||
extern const sVideo VideoNTSC;
|
||||
|
||||
// TV NTSC progressive 4:3 320x240 (5:4 300x240, 16:9 424x240)
|
||||
extern const sVideo VideoNTSCp;
|
||||
|
||||
// === Monitor videomodes
|
||||
|
||||
// EGA 8:5 640x400 (5:4 500x400, 4:3 528x400, 16:9 704x400), vert. 70 Hz, hor. 31.4685 kHz, pixel clock 25.175 MHz
|
||||
extern const sVideo VideoEGA;
|
||||
|
||||
// VGA 4:3 640x480 (16:9 848x480), vert. 60 Hz, hor. 31.4685 kHz, pixel clock 25.175 MHz
|
||||
extern const sVideo VideoVGA;
|
||||
|
||||
// SVGA 4:3 800x600 (16:9 1064x600), vert. 60 Hz, hor. 37.897 kHz, pixel clock 40 MHz
|
||||
extern const sVideo VideoSVGA;
|
||||
|
||||
// XGA 4:3 1024x768 (16:9 1360x768), vert. 60 Hz, hor. 48.36310 kHz, pixel clock 65 MHz
|
||||
extern const sVideo VideoXGA;
|
||||
|
||||
// VESA 4:3 1152x864, vert. 60 Hz, hor. 53.697 kHz, pixel clock 81.62 MHz
|
||||
extern const sVideo VideoVESA;
|
||||
|
||||
// HD 4:3 1280x960, vert. 53 Hz, hor. 51.858 kHz, pixel clock 102.1 MHz
|
||||
extern const sVideo VideoHD;
|
||||
|
||||
// required configuration to initialize VGA output
|
||||
typedef struct {
|
||||
|
|
@ -90,9 +53,6 @@ typedef struct {
|
|||
const sVideo* video; // used video timings
|
||||
u32 freq; // required minimal system frequency in kHz (real frequency can be higher)
|
||||
u32 fmax; // maximal system frequency in kHz (limit resolution if needed)
|
||||
u8 mode[LAYERS_MAX]; // modes of overlapped layers 0..3 LAYERMODE_* (LAYERMODE_BASE = layer is off)
|
||||
// - mode of layer 0 is ignored (always use LAYERMODE_BASE)
|
||||
// - all overlapped layers must use same layer program
|
||||
bool dbly; // double in Y direction
|
||||
bool lockfreq; // lock required frequency, do not change it
|
||||
} sVgaCfg;
|
||||
|
|
@ -116,7 +76,6 @@ typedef struct {
|
|||
u16 div; // divide base state machine clock
|
||||
u16 cpp; // state machine clocks per pixel
|
||||
u8 prog; // layer program LAYERPROG_*
|
||||
u8 mode[LAYERS_MAX]; // mode of layer 0..3 LAYERMODE_* (LAYERMODE_BASE = layer is off or base layer)
|
||||
|
||||
// horizontal timings
|
||||
u16 htot; // total state machine clocks per line
|
||||
|
|
@ -130,40 +89,24 @@ typedef struct {
|
|||
u16 vmax; // maximal height
|
||||
float vfreq; // vertical frequency in [Hz]
|
||||
|
||||
// subframe 1
|
||||
u16 vsync1; // V sync (half-)pulses on subframe 1
|
||||
u16 vpost1; // V sync post (half-)pulses on subframe 1
|
||||
u16 vback1; // V back porch (after VSYNC, before image) on subframe 1
|
||||
u16 vact1; // active visible scanlines, subframe 1
|
||||
u16 vfront1; // V front porch (after image, before VSYNC) on subframe 1
|
||||
u16 vpre1; // V sync pre (half-)pulses on subframe 1
|
||||
u16 vfirst1; // first active scanline, subframe 1
|
||||
|
||||
// subframe 2 (ignored if not interlaced)
|
||||
u16 vsync2; // V sync half-pulses on subframe 2
|
||||
u16 vpost2; // V sync post half-pulses on subframe 2
|
||||
u16 vback2; // V back porch (after VSYNC, before image) on subframe 2
|
||||
u16 vact2; // active visible scanlines, subframe 2
|
||||
u16 vfront2; // V front porch (after image, before VSYNC) on subframe 2
|
||||
u16 vpre2; // V sync pre half-pulses on subframe 2
|
||||
u16 vfirst2; // first active scanline, subframe 2
|
||||
|
||||
// name
|
||||
const char* name; // video timing name (VIDEO_NAME_LEN characters + terminating 0)
|
||||
// frame
|
||||
u16 vsync; // V sync (half-)pulses
|
||||
u16 vpost; // V sync post (half-)pulses
|
||||
u16 vback; // V back porch (after VSYNC, before image)
|
||||
u16 vact; // active visible scanlines
|
||||
u16 vfront; // V front porch (after image, before VSYNC)
|
||||
u16 vpre; // V sync pre (half-)pulses
|
||||
u16 vfirst; // first active scanline
|
||||
|
||||
// flags
|
||||
bool lockfreq; // lock current frequency, do not change it
|
||||
bool dbly; // double scanlines
|
||||
bool inter; // interlaced (use sub-frames)
|
||||
bool psync; // positive synchronization
|
||||
bool odd; // first sub-frame is odd lines 1, 3, 5,... (PAL)
|
||||
} sVmode;
|
||||
|
||||
// output device
|
||||
enum {
|
||||
DEV_PAL = 0, // PAL TV
|
||||
DEV_NTSC, // NTSC TV
|
||||
DEV_VGA, // VGA monitor
|
||||
DEV_VGA=0, // VGA monitor
|
||||
|
||||
DEV_MAX
|
||||
};
|
||||
|
|
@ -175,47 +118,17 @@ enum {
|
|||
RES_QVGA, // 320x240
|
||||
RES_EGA, // 512x400
|
||||
RES_VGA, // 640x480
|
||||
RES_SVGA, // 800x600 (not for TV device)
|
||||
RES_XGA, // 1024x768 (not for TV device)
|
||||
RES_HD, // 1280x960 (not for TV device)
|
||||
|
||||
RES_MAX
|
||||
};
|
||||
|
||||
// graphics formats
|
||||
enum {
|
||||
FORM_8BIT = 0, // 8-bit pixel graphics (up to EGA resolution)
|
||||
FORM_4BIT, // 4-bit pixel graphics (up to SVGA graphics)
|
||||
FORM_MONO, // 1-bit pixel graphics
|
||||
FORM_TILE8, // 8x8 tiles
|
||||
FORM_TILE12, // 12x12 tiles
|
||||
FORM_TILE16, // 16x16 tiles
|
||||
FORM_TILE24, // 24x24 tiles
|
||||
FORM_TILE32, // 32x32 tiles
|
||||
FORM_TILE48, // 48x48 tiles
|
||||
FORM_TILE64, // 64x64 tiles
|
||||
FORM_MTEXT8, // mono text with font 8x8
|
||||
FORM_MTEXT16, // mono text with font 8x16
|
||||
FORM_TEXT8, // attribute text with font 8x8
|
||||
FORM_TEXT16, // attribute text with font 8x16
|
||||
FORM_RLE, // images with RLE compression (on overlapped layer 1)
|
||||
|
||||
FORM_MAX
|
||||
};
|
||||
|
||||
extern sVmode Vmode; // videomode setup
|
||||
extern sVgaCfg Cfg; // required configuration
|
||||
extern sCanvas Canvas; // canvas of draw box
|
||||
|
||||
// 16-color palette translation table
|
||||
extern u16 Pal16Trans[256];
|
||||
|
||||
// initialize default VGA configuration
|
||||
void VgaCfgDef(sVgaCfg* cfg);
|
||||
|
||||
// debug print videomode setup
|
||||
void VgaPrintCfg(const sVmode* vmode);
|
||||
|
||||
// calculate videomode setup
|
||||
// cfg ... required configuration
|
||||
// vmode ... destination videomode setup for driver
|
||||
|
|
@ -224,15 +137,6 @@ void VgaCfg(const sVgaCfg* cfg, sVmode* vmode);
|
|||
// initialize videomode
|
||||
// dev ... device DEV_*
|
||||
// res ... resolution RES_*
|
||||
// form ... format FORM_*
|
||||
// buf ... pointer to frame buffer (must be aligned to 4-bites, use ALIGNED attribute)
|
||||
// buf2 ...pointer to additional buffer:
|
||||
// FORM_TILE*: pointer to column of tiles 32x32 in 8-bit graphics
|
||||
// FORM_TEXT: pointer to font 8x16 or 8x8 (size 4 KB or 2 KB, ALIGNED attribute, should be in RAM)
|
||||
// - copy font to 4KB or 2 KB RAM buffer with ALIGNED attribute
|
||||
// - text uses color attributes PC_*
|
||||
// FORM_RLE: pointer to image rows (ALIGNED attribute, should be in RAM)
|
||||
// JMH
|
||||
const sVmode* Video(u8 dev, u8 res, u8 form, u8* buf, const void* buf2 = NULL);
|
||||
const sVmode* Video(u8 dev, u8 res);
|
||||
|
||||
#endif // _VGA_VMODE_H
|
||||
|
|
|
|||
|
|
@ -20,15 +20,15 @@ static char * digits = "0123456789";
|
|||
|
||||
static uint8_t pix = 0;
|
||||
int main(void) {
|
||||
vreg_set_voltage(VREG_VOLTAGE_1_05);
|
||||
// vreg_set_voltage(VREG_VOLTAGE_1_05);
|
||||
// set_sys_clock_khz(125000, true);
|
||||
// set_sys_clock_khz(150000, true);
|
||||
// set_sys_clock_khz(133000, true);
|
||||
// set_sys_clock_khz(200000, true);
|
||||
// set_sys_clock_khz(210000, true);
|
||||
set_sys_clock_khz(230000, true);
|
||||
// set_sys_clock_khz(225000, true);
|
||||
set_sys_clock_khz(252000, true);
|
||||
|
||||
|
||||
// set_sys_clock_khz(250000, true);
|
||||
stdio_init_all();
|
||||
|
||||
printf("start\n");
|
||||
|
|
@ -56,7 +56,6 @@ int main(void) {
|
|||
buf[2] = digits[r3];
|
||||
vga.drawText(4*8,8,buf,BLUE,LIGHT_BLUE,false);
|
||||
|
||||
|
||||
while (true) {
|
||||
//tft.fillScreenNoDma( pix++ );
|
||||
vga.waitSync();
|
||||
|
|
|
|||
Loading…
Reference in a new issue