Remove soft link to libdvi (copy full directory instead)
For Arduino Library Manager compliance
This commit is contained in:
parent
506fca674a
commit
bb7dc7c20d
20 changed files with 2661 additions and 3 deletions
|
|
@ -19,8 +19,7 @@ RP2040 core).
|
|||
Changes vs main PicoDVI repo:
|
||||
- Add library.properties file, src and examples directories per Arduino
|
||||
requirements.
|
||||
- software/libdvi is soft-linked into src so Arduino IDE can compile these
|
||||
parts.
|
||||
- A full copy of software/libdvi is made in src (originally was soft-linked but Arduino Library Manager does not approve). If any updates are made in the original PicoDVI libdvi directory, copy them here!
|
||||
- The file dvi_serialiser.pio.h, normally not part of the distribution and
|
||||
generated during the Pico SDK build process, is provided here for Arduino
|
||||
build to work. If any changes are made in dvi_serialiser.pio (either here
|
||||
|
|
|
|||
|
|
@ -1 +0,0 @@
|
|||
../software/libdvi
|
||||
33
src/libdvi/CMakeLists.txt
Normal file
33
src/libdvi/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
# Note we are using INTERFACE so that the library can be configured per-app
|
||||
# with compile-time defines
|
||||
|
||||
add_library(libdvi INTERFACE)
|
||||
|
||||
target_sources(libdvi INTERFACE
|
||||
${CMAKE_CURRENT_LIST_DIR}/dvi.c
|
||||
${CMAKE_CURRENT_LIST_DIR}/dvi.h
|
||||
${CMAKE_CURRENT_LIST_DIR}/dvi_config_defs.h
|
||||
${CMAKE_CURRENT_LIST_DIR}/dvi_serialiser.c
|
||||
${CMAKE_CURRENT_LIST_DIR}/dvi_serialiser.h
|
||||
${CMAKE_CURRENT_LIST_DIR}/dvi_timing.c
|
||||
${CMAKE_CURRENT_LIST_DIR}/dvi_timing.h
|
||||
${CMAKE_CURRENT_LIST_DIR}/tmds_encode.S
|
||||
${CMAKE_CURRENT_LIST_DIR}/tmds_encode.c
|
||||
${CMAKE_CURRENT_LIST_DIR}/tmds_encode.h
|
||||
${CMAKE_CURRENT_LIST_DIR}/tmds_table.h
|
||||
${CMAKE_CURRENT_LIST_DIR}/tmds_table_fullres.h
|
||||
${CMAKE_CURRENT_LIST_DIR}/util_queue_u32_inline.h
|
||||
)
|
||||
|
||||
target_include_directories(libdvi INTERFACE ${CMAKE_CURRENT_LIST_DIR})
|
||||
target_link_libraries(libdvi INTERFACE
|
||||
pico_base_headers
|
||||
pico_util
|
||||
hardware_dma
|
||||
hardware_interp
|
||||
hardware_pio
|
||||
hardware_pwm
|
||||
)
|
||||
|
||||
pico_generate_pio_header(libdvi ${CMAKE_CURRENT_LIST_DIR}/dvi_serialiser.pio)
|
||||
pico_generate_pio_header(libdvi ${CMAKE_CURRENT_LIST_DIR}/tmds_encode_1bpp.pio)
|
||||
255
src/libdvi/dvi.c
Normal file
255
src/libdvi/dvi.c
Normal file
|
|
@ -0,0 +1,255 @@
|
|||
#include <stdlib.h>
|
||||
#include "hardware/dma.h"
|
||||
#include "hardware/irq.h"
|
||||
|
||||
#include "dvi.h"
|
||||
#include "dvi_timing.h"
|
||||
#include "dvi_serialiser.h"
|
||||
#include "tmds_encode.h"
|
||||
|
||||
// Adafruit PicoDVI fork requires a couple global items run-time configurable:
|
||||
uint8_t dvi_vertical_repeat = DVI_VERTICAL_REPEAT;
|
||||
bool dvi_monochrome_tmds = DVI_MONOCHROME_TMDS;
|
||||
|
||||
// Time-critical functions pulled into RAM but each in a unique section to
|
||||
// allow garbage collection
|
||||
#define __dvi_func(f) __not_in_flash_func(f)
|
||||
#define __dvi_func_x(f) __scratch_x(__STRING(f)) f
|
||||
|
||||
// We require exclusive use of a DMA IRQ line. (you wouldn't want to share
|
||||
// anyway). It's possible in theory to hook both IRQs and have two DVI outs.
|
||||
static struct dvi_inst *dma_irq_privdata[2];
|
||||
static void dvi_dma0_irq();
|
||||
static void dvi_dma1_irq();
|
||||
|
||||
void dvi_init(struct dvi_inst *inst, uint spinlock_tmds_queue, uint spinlock_colour_queue) {
|
||||
dvi_timing_state_init(&inst->timing_state);
|
||||
dvi_serialiser_init(&inst->ser_cfg);
|
||||
for (int i = 0; i < N_TMDS_LANES; ++i) {
|
||||
inst->dma_cfg[i].chan_ctrl = dma_claim_unused_channel(true);
|
||||
inst->dma_cfg[i].chan_data = dma_claim_unused_channel(true);
|
||||
inst->dma_cfg[i].tx_fifo = (void*)&inst->ser_cfg.pio->txf[inst->ser_cfg.sm_tmds[i]];
|
||||
inst->dma_cfg[i].dreq = pio_get_dreq(inst->ser_cfg.pio, inst->ser_cfg.sm_tmds[i], true);
|
||||
}
|
||||
inst->late_scanline_ctr = 0;
|
||||
inst->tmds_buf_release_next = NULL;
|
||||
inst->tmds_buf_release = NULL;
|
||||
queue_init_with_spinlock(&inst->q_tmds_valid, sizeof(void*), 8, spinlock_tmds_queue);
|
||||
queue_init_with_spinlock(&inst->q_tmds_free, sizeof(void*), 8, spinlock_tmds_queue);
|
||||
queue_init_with_spinlock(&inst->q_colour_valid, sizeof(void*), 8, spinlock_colour_queue);
|
||||
queue_init_with_spinlock(&inst->q_colour_free, sizeof(void*), 8, spinlock_colour_queue);
|
||||
|
||||
dvi_setup_scanline_for_vblank(inst->timing, inst->dma_cfg, true, &inst->dma_list_vblank_sync);
|
||||
dvi_setup_scanline_for_vblank(inst->timing, inst->dma_cfg, false, &inst->dma_list_vblank_nosync);
|
||||
#if defined(ARDUINO)
|
||||
dvi_setup_scanline_for_active(inst->timing, inst->dma_cfg, (uint32_t*)SRAM_BASE, &inst->dma_list_active);
|
||||
#else
|
||||
dvi_setup_scanline_for_active(inst->timing, inst->dma_cfg, (void*)SRAM_BASE, &inst->dma_list_active);
|
||||
#endif
|
||||
dvi_setup_scanline_for_active(inst->timing, inst->dma_cfg, NULL, &inst->dma_list_error);
|
||||
|
||||
for (int i = 0; i < DVI_N_TMDS_BUFFERS; ++i) {
|
||||
void *tmdsbuf;
|
||||
if (dvi_monochrome_tmds)
|
||||
tmdsbuf = malloc(inst->timing->h_active_pixels / DVI_SYMBOLS_PER_WORD * sizeof(uint32_t));
|
||||
else
|
||||
tmdsbuf = malloc(3 * inst->timing->h_active_pixels / DVI_SYMBOLS_PER_WORD * sizeof(uint32_t));
|
||||
if (!tmdsbuf)
|
||||
panic("TMDS buffer allocation failed");
|
||||
queue_add_blocking_u32(&inst->q_tmds_free, &tmdsbuf);
|
||||
}
|
||||
}
|
||||
|
||||
// The IRQs will run on whichever core calls this function (this is why it's
|
||||
// called separately from dvi_init)
|
||||
void dvi_register_irqs_this_core(struct dvi_inst *inst, uint irq_num) {
|
||||
uint32_t mask_sync_channel = 1u << inst->dma_cfg[TMDS_SYNC_LANE].chan_data;
|
||||
uint32_t mask_all_channels = 0;
|
||||
for (int i = 0; i < N_TMDS_LANES; ++i)
|
||||
mask_all_channels |= 1u << inst->dma_cfg[i].chan_ctrl | 1u << inst->dma_cfg[i].chan_data;
|
||||
|
||||
dma_hw->ints0 = mask_sync_channel;
|
||||
if (irq_num == DMA_IRQ_0) {
|
||||
hw_write_masked(&dma_hw->inte0, mask_sync_channel, mask_all_channels);
|
||||
dma_irq_privdata[0] = inst;
|
||||
irq_set_exclusive_handler(DMA_IRQ_0, dvi_dma0_irq);
|
||||
}
|
||||
else {
|
||||
hw_write_masked(&dma_hw->inte1, mask_sync_channel, mask_all_channels);
|
||||
dma_irq_privdata[1] = inst;
|
||||
irq_set_exclusive_handler(DMA_IRQ_1, dvi_dma1_irq);
|
||||
}
|
||||
irq_set_enabled(irq_num, true);
|
||||
}
|
||||
|
||||
// Set up control channels to make transfers to data channels' control
|
||||
// registers (but don't trigger the control channels -- this is done either by
|
||||
// data channel CHAIN_TO or an initial write to MULTI_CHAN_TRIGGER)
|
||||
static inline void __attribute__((always_inline)) _dvi_load_dma_op(const struct dvi_lane_dma_cfg dma_cfg[], struct dvi_scanline_dma_list *l) {
|
||||
for (int i = 0; i < N_TMDS_LANES; ++i) {
|
||||
dma_channel_config cfg = dma_channel_get_default_config(dma_cfg[i].chan_ctrl);
|
||||
channel_config_set_ring(&cfg, true, 4); // 16-byte write wrap
|
||||
channel_config_set_read_increment(&cfg, true);
|
||||
channel_config_set_write_increment(&cfg, true);
|
||||
dma_channel_configure(
|
||||
dma_cfg[i].chan_ctrl,
|
||||
&cfg,
|
||||
&dma_hw->ch[dma_cfg[i].chan_data],
|
||||
dvi_lane_from_list(l, i),
|
||||
4, // Configure all 4 registers then halt until next CHAIN_TO
|
||||
false
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Setup first set of control block lists, configure the control channels, and
|
||||
// trigger them. Control channels will subsequently be triggered only by DMA
|
||||
// CHAIN_TO on data channel completion. IRQ handler *must* be prepared before
|
||||
// calling this. (Hooked to DMA IRQ0)
|
||||
void dvi_start(struct dvi_inst *inst) {
|
||||
_dvi_load_dma_op(inst->dma_cfg, &inst->dma_list_vblank_nosync);
|
||||
dma_start_channel_mask(
|
||||
(1u << inst->dma_cfg[0].chan_ctrl) |
|
||||
(1u << inst->dma_cfg[1].chan_ctrl) |
|
||||
(1u << inst->dma_cfg[2].chan_ctrl));
|
||||
|
||||
// We really don't want the FIFOs to bottom out, so wait for full before
|
||||
// starting the shift-out.
|
||||
for (int i = 0; i < N_TMDS_LANES; ++i)
|
||||
while (!pio_sm_is_tx_fifo_full(inst->ser_cfg.pio, inst->ser_cfg.sm_tmds[i]))
|
||||
tight_loop_contents();
|
||||
dvi_serialiser_enable(&inst->ser_cfg, true);
|
||||
}
|
||||
|
||||
static inline void __dvi_func_x(_dvi_prepare_scanline_8bpp)(struct dvi_inst *inst, uint32_t *scanbuf) {
|
||||
uint32_t *tmdsbuf;
|
||||
queue_remove_blocking_u32(&inst->q_tmds_free, &tmdsbuf);
|
||||
uint pixwidth = inst->timing->h_active_pixels;
|
||||
uint words_per_channel = pixwidth / DVI_SYMBOLS_PER_WORD;
|
||||
// Scanline buffers are half-resolution; the functions take the number of *input* pixels as parameter.
|
||||
tmds_encode_data_channel_8bpp(scanbuf, tmdsbuf + 0 * words_per_channel, pixwidth / 2, DVI_8BPP_BLUE_MSB, DVI_8BPP_BLUE_LSB );
|
||||
tmds_encode_data_channel_8bpp(scanbuf, tmdsbuf + 1 * words_per_channel, pixwidth / 2, DVI_8BPP_GREEN_MSB, DVI_8BPP_GREEN_LSB);
|
||||
tmds_encode_data_channel_8bpp(scanbuf, tmdsbuf + 2 * words_per_channel, pixwidth / 2, DVI_8BPP_RED_MSB, DVI_8BPP_RED_LSB );
|
||||
queue_add_blocking_u32(&inst->q_tmds_valid, &tmdsbuf);
|
||||
}
|
||||
|
||||
static inline void __dvi_func_x(_dvi_prepare_scanline_16bpp)(struct dvi_inst *inst, uint32_t *scanbuf) {
|
||||
uint32_t *tmdsbuf;
|
||||
queue_remove_blocking_u32(&inst->q_tmds_free, &tmdsbuf);
|
||||
uint pixwidth = inst->timing->h_active_pixels;
|
||||
uint words_per_channel = pixwidth / DVI_SYMBOLS_PER_WORD;
|
||||
tmds_encode_data_channel_16bpp(scanbuf, tmdsbuf + 0 * words_per_channel, pixwidth / 2, DVI_16BPP_BLUE_MSB, DVI_16BPP_BLUE_LSB );
|
||||
tmds_encode_data_channel_16bpp(scanbuf, tmdsbuf + 1 * words_per_channel, pixwidth / 2, DVI_16BPP_GREEN_MSB, DVI_16BPP_GREEN_LSB);
|
||||
tmds_encode_data_channel_16bpp(scanbuf, tmdsbuf + 2 * words_per_channel, pixwidth / 2, DVI_16BPP_RED_MSB, DVI_16BPP_RED_LSB );
|
||||
queue_add_blocking_u32(&inst->q_tmds_valid, &tmdsbuf);
|
||||
}
|
||||
|
||||
// "Worker threads" for TMDS encoding (core enters and never returns, but still handles IRQs)
|
||||
|
||||
// Version where each record in q_colour_valid is one scanline:
|
||||
void __dvi_func(dvi_scanbuf_main_8bpp)(struct dvi_inst *inst) {
|
||||
uint y = 0;
|
||||
while (1) {
|
||||
uint32_t *scanbuf;
|
||||
queue_remove_blocking_u32(&inst->q_colour_valid, &scanbuf);
|
||||
_dvi_prepare_scanline_8bpp(inst, scanbuf);
|
||||
queue_add_blocking_u32(&inst->q_colour_free, &scanbuf);
|
||||
++y;
|
||||
if (y == inst->timing->v_active_lines) {
|
||||
y = 0;
|
||||
}
|
||||
}
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
// Ugh copy/paste but it lets us garbage collect the TMDS stuff that is not being used from .scratch_x
|
||||
void __dvi_func(dvi_scanbuf_main_16bpp)(struct dvi_inst *inst) {
|
||||
uint y = 0;
|
||||
while (1) {
|
||||
uint32_t *scanbuf;
|
||||
queue_remove_blocking_u32(&inst->q_colour_valid, &scanbuf);
|
||||
_dvi_prepare_scanline_16bpp(inst, scanbuf);
|
||||
queue_add_blocking_u32(&inst->q_colour_free, &scanbuf);
|
||||
++y;
|
||||
if (y == inst->timing->v_active_lines) {
|
||||
y = 0;
|
||||
}
|
||||
}
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
static void __dvi_func(dvi_dma_irq_handler)(struct dvi_inst *inst) {
|
||||
// Every fourth interrupt marks the start of the horizontal active region. We
|
||||
// now have until the end of this region to generate DMA blocklist for next
|
||||
// scanline.
|
||||
dvi_timing_state_advance(inst->timing, &inst->timing_state);
|
||||
if (inst->tmds_buf_release && !queue_try_add_u32(&inst->q_tmds_free, &inst->tmds_buf_release))
|
||||
panic("TMDS free queue full in IRQ!");
|
||||
inst->tmds_buf_release = inst->tmds_buf_release_next;
|
||||
inst->tmds_buf_release_next = NULL;
|
||||
|
||||
// Make sure all three channels have definitely loaded their last block
|
||||
// (should be within a few cycles of one another)
|
||||
for (int i = 0; i < N_TMDS_LANES; ++i) {
|
||||
while (dma_debug_hw->ch[inst->dma_cfg[i].chan_data].tcr != inst->timing->h_active_pixels / DVI_SYMBOLS_PER_WORD)
|
||||
tight_loop_contents();
|
||||
}
|
||||
|
||||
uint32_t *tmdsbuf;
|
||||
while (inst->late_scanline_ctr > 0 && queue_try_remove_u32(&inst->q_tmds_valid, &tmdsbuf)) {
|
||||
// If we displayed this buffer then it would be in the wrong vertical
|
||||
// position on-screen. Just pass it back.
|
||||
queue_add_blocking_u32(&inst->q_tmds_free, &tmdsbuf);
|
||||
--inst->late_scanline_ctr;
|
||||
}
|
||||
|
||||
if (inst->timing_state.v_state != DVI_STATE_ACTIVE) {
|
||||
// Don't care
|
||||
tmdsbuf = NULL;
|
||||
}
|
||||
else if (queue_try_peek_u32(&inst->q_tmds_valid, &tmdsbuf)) {
|
||||
if (inst->timing_state.v_ctr % dvi_vertical_repeat == dvi_vertical_repeat - 1) {
|
||||
queue_remove_blocking_u32(&inst->q_tmds_valid, &tmdsbuf);
|
||||
inst->tmds_buf_release_next = tmdsbuf;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// No valid scanline was ready (generates solid red scanline)
|
||||
tmdsbuf = NULL;
|
||||
if (inst->timing_state.v_ctr % dvi_vertical_repeat == dvi_vertical_repeat - 1)
|
||||
++inst->late_scanline_ctr;
|
||||
}
|
||||
|
||||
switch (inst->timing_state.v_state) {
|
||||
case DVI_STATE_ACTIVE:
|
||||
if (tmdsbuf) {
|
||||
dvi_update_scanline_data_dma(inst->timing, tmdsbuf, &inst->dma_list_active);
|
||||
_dvi_load_dma_op(inst->dma_cfg, &inst->dma_list_active);
|
||||
}
|
||||
else {
|
||||
_dvi_load_dma_op(inst->dma_cfg, &inst->dma_list_error);
|
||||
}
|
||||
if (inst->scanline_callback && inst->timing_state.v_ctr % dvi_vertical_repeat == dvi_vertical_repeat - 1) {
|
||||
inst->scanline_callback();
|
||||
}
|
||||
break;
|
||||
case DVI_STATE_SYNC:
|
||||
_dvi_load_dma_op(inst->dma_cfg, &inst->dma_list_vblank_sync);
|
||||
break;
|
||||
default:
|
||||
_dvi_load_dma_op(inst->dma_cfg, &inst->dma_list_vblank_nosync);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void __dvi_func(dvi_dma0_irq)() {
|
||||
struct dvi_inst *inst = dma_irq_privdata[0];
|
||||
dma_hw->ints0 = 1u << inst->dma_cfg[TMDS_SYNC_LANE].chan_data;
|
||||
dvi_dma_irq_handler(inst);
|
||||
}
|
||||
|
||||
static void __dvi_func(dvi_dma1_irq)() {
|
||||
struct dvi_inst *inst = dma_irq_privdata[1];
|
||||
dma_hw->ints1 = 1u << inst->dma_cfg[TMDS_SYNC_LANE].chan_data;
|
||||
dvi_dma_irq_handler(inst);
|
||||
}
|
||||
81
src/libdvi/dvi.h
Normal file
81
src/libdvi/dvi.h
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
#ifndef _DVI_H
|
||||
#define _DVI_H
|
||||
|
||||
#define N_TMDS_LANES 3
|
||||
#define TMDS_SYNC_LANE 0 // blue!
|
||||
|
||||
#include "pico/util/queue.h"
|
||||
|
||||
#include "dvi_config_defs.h"
|
||||
#include "dvi_timing.h"
|
||||
#include "dvi_serialiser.h"
|
||||
#include "util_queue_u32_inline.h"
|
||||
|
||||
typedef void (*dvi_callback_t)(void);
|
||||
|
||||
struct dvi_inst {
|
||||
// Config ---
|
||||
const struct dvi_timing *timing;
|
||||
struct dvi_lane_dma_cfg dma_cfg[N_TMDS_LANES];
|
||||
struct dvi_timing_state timing_state;
|
||||
struct dvi_serialiser_cfg ser_cfg;
|
||||
// Called in the DMA IRQ once per scanline -- careful with the run time!
|
||||
dvi_callback_t scanline_callback;
|
||||
|
||||
// State ---
|
||||
struct dvi_scanline_dma_list dma_list_vblank_sync;
|
||||
struct dvi_scanline_dma_list dma_list_vblank_nosync;
|
||||
struct dvi_scanline_dma_list dma_list_active;
|
||||
struct dvi_scanline_dma_list dma_list_error;
|
||||
|
||||
// After a TMDS buffer has been enqueue via a control block for the last
|
||||
// time, two IRQs must go by before freeing. The first indicates the control
|
||||
// block for this buf has been loaded, and the second occurs some time after
|
||||
// the actual data DMA transfer has completed.
|
||||
uint32_t *tmds_buf_release_next;
|
||||
uint32_t *tmds_buf_release;
|
||||
// Remember how far behind the source is on TMDS scanlines, so we can output
|
||||
// solid colour until they catch up (rather than dying spectacularly)
|
||||
uint late_scanline_ctr;
|
||||
|
||||
// Encoded scanlines:
|
||||
queue_t q_tmds_valid;
|
||||
queue_t q_tmds_free;
|
||||
|
||||
// Either scanline buffers or frame buffers:
|
||||
queue_t q_colour_valid;
|
||||
queue_t q_colour_free;
|
||||
|
||||
};
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
// Set up data structures and hardware for DVI.
|
||||
void dvi_init(struct dvi_inst *inst, uint spinlock_tmds_queue, uint spinlock_colour_queue);
|
||||
|
||||
// Call this after calling dvi_init(). DVI DMA interrupts will be routed to
|
||||
// whichever core called this function. Registers an exclusive IRQ handler.
|
||||
void dvi_register_irqs_this_core(struct dvi_inst *inst, uint irq_num);
|
||||
|
||||
// Start actually wiggling TMDS pairs. Call this once you have initialised the
|
||||
// DVI, have registered the IRQs, and are producing rendered scanlines.
|
||||
void dvi_start(struct dvi_inst *inst);
|
||||
|
||||
// TMDS encode worker function: core enters and doesn't leave, but still
|
||||
// responds to IRQs. Repeatedly pop a scanline buffer from q_colour_valid,
|
||||
// TMDS encode it, and pass it to the tmds valid queue.
|
||||
void dvi_scanbuf_main_8bpp(struct dvi_inst *inst);
|
||||
void dvi_scanbuf_main_16bpp(struct dvi_inst *inst);
|
||||
|
||||
// Same as above, but each q_colour_valid entry is a framebuffer
|
||||
void dvi_framebuf_main_8bpp(struct dvi_inst *inst);
|
||||
void dvi_framebuf_main_16bpp(struct dvi_inst *inst);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
151
src/libdvi/dvi_config_defs.h
Normal file
151
src/libdvi/dvi_config_defs.h
Normal file
|
|
@ -0,0 +1,151 @@
|
|||
#ifndef _DVI_CONFIG_DEFS_H
|
||||
#define _DVI_CONFIG_DEFS_H
|
||||
|
||||
// Compile-time configuration definitions for libdvi. This file provides
|
||||
// defaults -- you can override using a board header, or setting compile
|
||||
// definitions directly from the commandline (e.g. using CMake
|
||||
// target_compile_definitions())
|
||||
|
||||
// Pull in base headers to make sure board definitions override the
|
||||
// definitions provided here. Note this file is included in asm and C.
|
||||
#include "hardware/platform_defs.h"
|
||||
#include "pico/config.h"
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// General DVI defines
|
||||
|
||||
// How many times to output the same TMDS buffer before recyling it onto the
|
||||
// free queue. Pixels are repeated vertically if this is >1.
|
||||
#ifndef DVI_VERTICAL_REPEAT
|
||||
#define DVI_VERTICAL_REPEAT 2
|
||||
#endif
|
||||
|
||||
// Number of TMDS buffers to allocate (malloc()) in DVI init. You can set this
|
||||
// to 0 if you want to allocate your own (e.g. if you want static buffers)
|
||||
#ifndef DVI_N_TMDS_BUFFERS
|
||||
#define DVI_N_TMDS_BUFFERS 3
|
||||
#endif
|
||||
|
||||
// If 1, replace the DVI serialiser with a 10n1 UART (1 start bit, 10 data
|
||||
// bits, 1 stop bit) so the stream can be dumped and analysed easily.
|
||||
#ifndef DVI_SERIAL_DEBUG
|
||||
#define DVI_SERIAL_DEBUG 0
|
||||
#endif
|
||||
|
||||
// If 1, the same TMDS symbols are sent to all 3 lanes during the horizontal
|
||||
// active period. This means only monochrome colour is available, but the TMDS
|
||||
// buffers are 3 times smaller as a result, and the performance requirements
|
||||
// for encode are also cut by 3.
|
||||
#ifndef DVI_MONOCHROME_TMDS
|
||||
#define DVI_MONOCHROME_TMDS 0
|
||||
#endif
|
||||
|
||||
// By default, we assume each 32-bit word written to a PIO FIFO contains 2x
|
||||
// 10-bit TMDS symbols, concatenated into the lower 20 bits, least-significant
|
||||
// first. This is convenient if you are generating two or more pixels at once,
|
||||
// e.g. using the pixel-doubling TMDS encode. You can change this value to 1
|
||||
// (so each word contains 1 symbol) for e.g. full resolution RGB encode. Note
|
||||
// that this value needs to divide the DVI horizontal timings, so is limited
|
||||
// to 1 or 2.
|
||||
#ifndef DVI_SYMBOLS_PER_WORD
|
||||
#define DVI_SYMBOLS_PER_WORD 2
|
||||
#endif
|
||||
|
||||
#if DVI_SYMBOLS_PER_WORD != 1 && DVI_SYMBOLS_PER_WORD !=2
|
||||
#error "Unsupported value for DVI_SYMBOLS_PER_WORD"
|
||||
#endif
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Pixel component layout
|
||||
|
||||
// By default we go R, G, B from MSB -> LSB. Override to e.g. swap RGB <-> BGR
|
||||
|
||||
// Default 8bpp layout: RGB332, {r[1:0], g[2:0], b[1:0]}
|
||||
|
||||
#ifndef DVI_8BPP_RED_MSB
|
||||
#define DVI_8BPP_RED_MSB 7
|
||||
#endif
|
||||
|
||||
#ifndef DVI_8BPP_RED_LSB
|
||||
#define DVI_8BPP_RED_LSB 5
|
||||
#endif
|
||||
|
||||
#ifndef DVI_8BPP_GREEN_MSB
|
||||
#define DVI_8BPP_GREEN_MSB 4
|
||||
#endif
|
||||
|
||||
#ifndef DVI_8BPP_GREEN_LSB
|
||||
#define DVI_8BPP_GREEN_LSB 2
|
||||
#endif
|
||||
|
||||
#ifndef DVI_8BPP_BLUE_MSB
|
||||
#define DVI_8BPP_BLUE_MSB 1
|
||||
#endif
|
||||
|
||||
#ifndef DVI_8BPP_BLUE_LSB
|
||||
#define DVI_8BPP_BLUE_LSB 0
|
||||
#endif
|
||||
|
||||
// Default 16bpp layout: RGB565, {r[4:0], g[5:0], b[4:0]}
|
||||
|
||||
#ifndef DVI_16BPP_RED_MSB
|
||||
#define DVI_16BPP_RED_MSB 15
|
||||
#endif
|
||||
|
||||
#ifndef DVI_16BPP_RED_LSB
|
||||
#define DVI_16BPP_RED_LSB 11
|
||||
#endif
|
||||
|
||||
#ifndef DVI_16BPP_GREEN_MSB
|
||||
#define DVI_16BPP_GREEN_MSB 10
|
||||
#endif
|
||||
|
||||
#ifndef DVI_16BPP_GREEN_LSB
|
||||
#define DVI_16BPP_GREEN_LSB 5
|
||||
#endif
|
||||
|
||||
#ifndef DVI_16BPP_BLUE_MSB
|
||||
#define DVI_16BPP_BLUE_MSB 4
|
||||
#endif
|
||||
|
||||
#ifndef DVI_16BPP_BLUE_LSB
|
||||
#define DVI_16BPP_BLUE_LSB 0
|
||||
#endif
|
||||
|
||||
// Default 1bpp layout: bitwise little-endian, i.e. least significant bit of
|
||||
// each word is the first (leftmost) of a block of 32 pixels.
|
||||
|
||||
// If 1, reverse the order of pixels within each byte. Order of bytes within
|
||||
// each word is still little-endian.
|
||||
#ifndef DVI_1BPP_BIT_REVERSE
|
||||
#define DVI_1BPP_BIT_REVERSE 1 // Adafruit_GFX GFXcanvas1 requires this 1
|
||||
#endif
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// TMDS encode controls
|
||||
|
||||
// Number of TMDS loop bodies between branches. cmp + branch costs 3 cycles,
|
||||
// so you can easily save 10% of encode time by bumping this. Note that body
|
||||
// will *already* produce multiple pixels, and total symbols per iteration
|
||||
// must cleanly divide symbols per scanline, else the loop won't terminate.
|
||||
// Point gun away from foot.
|
||||
#ifndef TMDS_ENCODE_UNROLL
|
||||
#define TMDS_ENCODE_UNROLL 1
|
||||
#endif
|
||||
|
||||
// If 1, don't save/restore the interpolators on full-resolution TMDS encode.
|
||||
// Speed hack. The TMDS code uses both interpolators, for each of the 3 data
|
||||
// channels, so this define avoids 6 save/restores per scanline.
|
||||
#ifndef TMDS_FULLRES_NO_INTERP_SAVE
|
||||
#define TMDS_FULLRES_NO_INTERP_SAVE 0
|
||||
#endif
|
||||
|
||||
// If 1, don't DC-balance the output of full resolution encode. Hilariously
|
||||
// noncompliant, but Dell Ultrasharp -- the honey badger of computer monitors
|
||||
// -- does not seem to mind (it helps that we DC-couple). Another speed hack,
|
||||
// useful when you are trying to get everything else up to speed.
|
||||
#ifndef TMDS_FULLRES_NO_DC_BALANCE
|
||||
#define TMDS_FULLRES_NO_DC_BALANCE 0
|
||||
#endif
|
||||
|
||||
#endif
|
||||
73
src/libdvi/dvi_serialiser.c
Normal file
73
src/libdvi/dvi_serialiser.c
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
#include "pico.h"
|
||||
#include "hardware/pio.h"
|
||||
#include "hardware/gpio.h"
|
||||
#include "hardware/pwm.h"
|
||||
#include "hardware/structs/padsbank0.h"
|
||||
|
||||
#include "dvi.h"
|
||||
#include "dvi_serialiser.h"
|
||||
#include "dvi_serialiser.pio.h"
|
||||
|
||||
static void dvi_configure_pad(uint gpio, bool invert) {
|
||||
// 2 mA drive, enable slew rate limiting (this seems fine even at 720p30, and
|
||||
// the 3V3 LDO doesn't get warm like when turning all the GPIOs up to 11).
|
||||
// Also disable digital receiver.
|
||||
hw_write_masked(
|
||||
&padsbank0_hw->io[gpio],
|
||||
(0 << PADS_BANK0_GPIO0_DRIVE_LSB),
|
||||
PADS_BANK0_GPIO0_DRIVE_BITS | PADS_BANK0_GPIO0_SLEWFAST_BITS | PADS_BANK0_GPIO0_IE_BITS
|
||||
);
|
||||
gpio_set_outover(gpio, invert ? GPIO_OVERRIDE_INVERT : GPIO_OVERRIDE_NORMAL);
|
||||
}
|
||||
|
||||
void dvi_serialiser_init(struct dvi_serialiser_cfg *cfg) {
|
||||
#if DVI_SERIAL_DEBUG
|
||||
uint offset = pio_add_program(cfg->pio, &dvi_serialiser_debug_program);
|
||||
#else
|
||||
uint offset = pio_add_program(cfg->pio, &dvi_serialiser_program);
|
||||
#endif
|
||||
cfg->prog_offs = offset;
|
||||
|
||||
for (int i = 0; i < N_TMDS_LANES; ++i) {
|
||||
pio_sm_claim(cfg->pio, cfg->sm_tmds[i]);
|
||||
dvi_serialiser_program_init(
|
||||
cfg->pio,
|
||||
cfg->sm_tmds[i],
|
||||
offset,
|
||||
cfg->pins_tmds[i],
|
||||
DVI_SERIAL_DEBUG
|
||||
);
|
||||
dvi_configure_pad(cfg->pins_tmds[i], cfg->invert_diffpairs);
|
||||
dvi_configure_pad(cfg->pins_tmds[i] + 1, cfg->invert_diffpairs);
|
||||
}
|
||||
|
||||
// Use a PWM slice to drive the pixel clock. Both GPIOs must be on the same
|
||||
// slice (lower-numbered GPIO must be even).
|
||||
assert(cfg->pins_clk % 2 == 0);
|
||||
uint slice = pwm_gpio_to_slice_num(cfg->pins_clk);
|
||||
// 5 cycles high, 5 low. Invert one channel so that we get complementary outputs.
|
||||
pwm_config pwm_cfg = pwm_get_default_config();
|
||||
pwm_config_set_output_polarity(&pwm_cfg, true, false);
|
||||
pwm_config_set_wrap(&pwm_cfg, 9);
|
||||
pwm_init(slice, &pwm_cfg, false);
|
||||
pwm_set_both_levels(slice, 5, 5);
|
||||
|
||||
for (uint i = cfg->pins_clk; i <= cfg->pins_clk + 1; ++i) {
|
||||
gpio_set_function(i, GPIO_FUNC_PWM);
|
||||
dvi_configure_pad(i, cfg->invert_diffpairs);
|
||||
}
|
||||
}
|
||||
|
||||
void dvi_serialiser_enable(struct dvi_serialiser_cfg *cfg, bool enable) {
|
||||
uint mask = 0;
|
||||
for (int i = 0; i < N_TMDS_LANES; ++i)
|
||||
mask |= 1u << (cfg->sm_tmds[i] + PIO_CTRL_SM_ENABLE_LSB);
|
||||
if (enable) {
|
||||
hw_set_bits(&cfg->pio->ctrl, mask);
|
||||
pwm_set_enabled(pwm_gpio_to_slice_num(cfg->pins_clk), true);
|
||||
}
|
||||
else {
|
||||
hw_clear_bits(&cfg->pio->ctrl, mask);
|
||||
pwm_set_enabled(pwm_gpio_to_slice_num(cfg->pins_clk), false);
|
||||
}
|
||||
}
|
||||
22
src/libdvi/dvi_serialiser.h
Normal file
22
src/libdvi/dvi_serialiser.h
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
#ifndef _DVI_SERIALISER_H
|
||||
#define _DVI_SERIALISER_H
|
||||
|
||||
#include "hardware/pio.h"
|
||||
#include "dvi_config_defs.h"
|
||||
|
||||
#define N_TMDS_LANES 3
|
||||
|
||||
struct dvi_serialiser_cfg {
|
||||
PIO pio;
|
||||
uint sm_tmds[N_TMDS_LANES];
|
||||
uint pins_tmds[N_TMDS_LANES];
|
||||
uint pins_clk;
|
||||
bool invert_diffpairs;
|
||||
uint prog_offs;
|
||||
};
|
||||
|
||||
void dvi_serialiser_init(struct dvi_serialiser_cfg *cfg);
|
||||
void dvi_serialiser_enable(struct dvi_serialiser_cfg *cfg, bool enable);
|
||||
uint32_t dvi_single_to_diff(uint32_t in);
|
||||
|
||||
#endif
|
||||
53
src/libdvi/dvi_serialiser.pio
Normal file
53
src/libdvi/dvi_serialiser.pio
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
.program dvi_serialiser
|
||||
.side_set 2
|
||||
.origin 0
|
||||
|
||||
; Single-ended -> differential serial
|
||||
|
||||
out pc, 1 side 0b10
|
||||
out pc, 1 side 0b01
|
||||
|
||||
.program dvi_serialiser_debug
|
||||
.side_set 1 opt
|
||||
|
||||
; The debug variant behaves as a UART with 1 start bit, 10 data bits, 1 stop
|
||||
; bit, and 5/6ths the data throughput of the TMDS version.
|
||||
|
||||
pull ifempty side 1 ; Extend stop bit with FIFO stall
|
||||
nop side 0
|
||||
out pins, 1 ; Unrolled because we require 1 bit / clk
|
||||
out pins, 1
|
||||
out pins, 1
|
||||
out pins, 1
|
||||
out pins, 1
|
||||
out pins, 1
|
||||
out pins, 1
|
||||
out pins, 1
|
||||
out pins, 1
|
||||
out pins, 1
|
||||
|
||||
% c-sdk {
|
||||
#include "dvi_config_defs.h"
|
||||
|
||||
static inline void dvi_serialiser_program_init(PIO pio, uint sm, uint offset, uint data_pins, bool debug) {
|
||||
pio_sm_set_pins_with_mask(pio, sm, 2u << data_pins, 3u << data_pins);
|
||||
pio_sm_set_pindirs_with_mask(pio, sm, ~0u, 3u << data_pins);
|
||||
pio_gpio_init(pio, data_pins);
|
||||
pio_gpio_init(pio, data_pins + 1);
|
||||
|
||||
pio_sm_config c;
|
||||
if (debug) {
|
||||
c = dvi_serialiser_debug_program_get_default_config(offset);
|
||||
}
|
||||
else {
|
||||
c = dvi_serialiser_program_get_default_config(offset);
|
||||
}
|
||||
sm_config_set_sideset_pins(&c, data_pins);
|
||||
if (debug)
|
||||
sm_config_set_out_pins(&c, data_pins, 1);
|
||||
sm_config_set_out_shift(&c, true, !debug, 10 * DVI_SYMBOLS_PER_WORD);
|
||||
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
|
||||
pio_sm_init(pio, sm, offset, &c);
|
||||
pio_sm_set_enabled(pio, sm, false);
|
||||
}
|
||||
%}
|
||||
101
src/libdvi/dvi_serialiser.pio.h
Normal file
101
src/libdvi/dvi_serialiser.pio.h
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
// -------------------------------------------------- //
|
||||
// This file is autogenerated by pioasm; do not edit! //
|
||||
// -------------------------------------------------- //
|
||||
|
||||
#pragma once
|
||||
|
||||
#if !PICO_NO_HARDWARE
|
||||
#include "hardware/pio.h"
|
||||
#endif
|
||||
|
||||
// -------------- //
|
||||
// dvi_serialiser //
|
||||
// -------------- //
|
||||
|
||||
#define dvi_serialiser_wrap_target 0
|
||||
#define dvi_serialiser_wrap 1
|
||||
|
||||
static const uint16_t dvi_serialiser_program_instructions[] = {
|
||||
// .wrap_target
|
||||
0x70a1, // 0: out pc, 1 side 2
|
||||
0x68a1, // 1: out pc, 1 side 1
|
||||
// .wrap
|
||||
};
|
||||
|
||||
#if !PICO_NO_HARDWARE
|
||||
static const struct pio_program dvi_serialiser_program = {
|
||||
.instructions = dvi_serialiser_program_instructions,
|
||||
.length = 2,
|
||||
.origin = 0,
|
||||
};
|
||||
|
||||
static inline pio_sm_config dvi_serialiser_program_get_default_config(uint offset) {
|
||||
pio_sm_config c = pio_get_default_sm_config();
|
||||
sm_config_set_wrap(&c, offset + dvi_serialiser_wrap_target, offset + dvi_serialiser_wrap);
|
||||
sm_config_set_sideset(&c, 2, false, false);
|
||||
return c;
|
||||
}
|
||||
#endif
|
||||
|
||||
// -------------------- //
|
||||
// dvi_serialiser_debug //
|
||||
// -------------------- //
|
||||
|
||||
#define dvi_serialiser_debug_wrap_target 0
|
||||
#define dvi_serialiser_debug_wrap 11
|
||||
|
||||
static const uint16_t dvi_serialiser_debug_program_instructions[] = {
|
||||
// .wrap_target
|
||||
0x98e0, // 0: pull ifempty block side 1
|
||||
0xb042, // 1: nop side 0
|
||||
0x6001, // 2: out pins, 1
|
||||
0x6001, // 3: out pins, 1
|
||||
0x6001, // 4: out pins, 1
|
||||
0x6001, // 5: out pins, 1
|
||||
0x6001, // 6: out pins, 1
|
||||
0x6001, // 7: out pins, 1
|
||||
0x6001, // 8: out pins, 1
|
||||
0x6001, // 9: out pins, 1
|
||||
0x6001, // 10: out pins, 1
|
||||
0x6001, // 11: out pins, 1
|
||||
// .wrap
|
||||
};
|
||||
|
||||
#if !PICO_NO_HARDWARE
|
||||
static const struct pio_program dvi_serialiser_debug_program = {
|
||||
.instructions = dvi_serialiser_debug_program_instructions,
|
||||
.length = 12,
|
||||
.origin = -1,
|
||||
};
|
||||
|
||||
static inline pio_sm_config dvi_serialiser_debug_program_get_default_config(uint offset) {
|
||||
pio_sm_config c = pio_get_default_sm_config();
|
||||
sm_config_set_wrap(&c, offset + dvi_serialiser_debug_wrap_target, offset + dvi_serialiser_debug_wrap);
|
||||
sm_config_set_sideset(&c, 2, true, false);
|
||||
return c;
|
||||
}
|
||||
|
||||
#include "dvi_config_defs.h"
|
||||
static inline void dvi_serialiser_program_init(PIO pio, uint sm, uint offset, uint data_pins, bool debug) {
|
||||
pio_sm_set_pins_with_mask(pio, sm, 2u << data_pins, 3u << data_pins);
|
||||
pio_sm_set_pindirs_with_mask(pio, sm, ~0u, 3u << data_pins);
|
||||
pio_gpio_init(pio, data_pins);
|
||||
pio_gpio_init(pio, data_pins + 1);
|
||||
pio_sm_config c;
|
||||
if (debug) {
|
||||
c = dvi_serialiser_debug_program_get_default_config(offset);
|
||||
}
|
||||
else {
|
||||
c = dvi_serialiser_program_get_default_config(offset);
|
||||
}
|
||||
sm_config_set_sideset_pins(&c, data_pins);
|
||||
if (debug)
|
||||
sm_config_set_out_pins(&c, data_pins, 1);
|
||||
sm_config_set_out_shift(&c, true, !debug, 10 * DVI_SYMBOLS_PER_WORD);
|
||||
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
|
||||
pio_sm_init(pio, sm, offset, &c);
|
||||
pio_sm_set_enabled(pio, sm, false);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
324
src/libdvi/dvi_timing.c
Normal file
324
src/libdvi/dvi_timing.c
Normal file
|
|
@ -0,0 +1,324 @@
|
|||
#include "dvi.h"
|
||||
#include "dvi_timing.h"
|
||||
#include "hardware/dma.h"
|
||||
|
||||
// This file contains:
|
||||
// - Timing parameters for DVI modes (horizontal + vertical counts, best
|
||||
// achievable bit clock from 12 MHz crystal)
|
||||
// - Helper functions for generating DMA lists based on these timings
|
||||
|
||||
extern bool dvi_monochrome_tmds; // In dvi.c
|
||||
|
||||
// Pull into RAM but apply unique section suffix to allow linker GC
|
||||
#define __dvi_func(x) __not_in_flash_func(x)
|
||||
#define __dvi_const(x) __not_in_flash_func(x)
|
||||
|
||||
// VGA -- we do this mode properly, with a pretty comfortable clk_sys (252 MHz)
|
||||
const struct dvi_timing __dvi_const(dvi_timing_640x480p_60hz) = {
|
||||
.h_sync_polarity = false,
|
||||
.h_front_porch = 16,
|
||||
.h_sync_width = 96,
|
||||
.h_back_porch = 48,
|
||||
.h_active_pixels = 640,
|
||||
|
||||
.v_sync_polarity = false,
|
||||
.v_front_porch = 10,
|
||||
.v_sync_width = 2,
|
||||
.v_back_porch = 33,
|
||||
.v_active_lines = 480,
|
||||
|
||||
.bit_clk_khz = 252000
|
||||
};
|
||||
|
||||
// SVGA -- completely by-the-book but requires 400 MHz clk_sys
|
||||
const struct dvi_timing __dvi_const(dvi_timing_800x600p_60hz) = {
|
||||
.h_sync_polarity = false,
|
||||
.h_front_porch = 44,
|
||||
.h_sync_width = 128,
|
||||
.h_back_porch = 88,
|
||||
.h_active_pixels = 800,
|
||||
|
||||
.v_sync_polarity = false,
|
||||
.v_front_porch = 1,
|
||||
.v_sync_width = 4,
|
||||
.v_back_porch = 23,
|
||||
.v_active_lines = 600,
|
||||
|
||||
.bit_clk_khz = 400000
|
||||
};
|
||||
|
||||
// 800x480p 60 Hz (note this doesn't seem to be a CEA mode, I just used the
|
||||
// output of `cvt 800 480 60`), 295 MHz bit clock
|
||||
const struct dvi_timing __dvi_const(dvi_timing_800x480p_60hz) = {
|
||||
.h_sync_polarity = false,
|
||||
.h_front_porch = 24,
|
||||
.h_sync_width = 72,
|
||||
.h_back_porch = 96,
|
||||
.h_active_pixels = 800,
|
||||
|
||||
.v_sync_polarity = true,
|
||||
.v_front_porch = 3,
|
||||
.v_sync_width = 10,
|
||||
.v_back_porch = 7,
|
||||
.v_active_lines = 480,
|
||||
|
||||
.bit_clk_khz = 295200
|
||||
};
|
||||
|
||||
// SVGA reduced blanking (355 MHz bit clock) -- valid CVT mode, less common
|
||||
// than fully-blanked SVGA, but doesn't require such a high system clock
|
||||
const struct dvi_timing __dvi_const(dvi_timing_800x600p_reduced_60hz) = {
|
||||
.h_sync_polarity = true,
|
||||
.h_front_porch = 48,
|
||||
.h_sync_width = 32,
|
||||
.h_back_porch = 80,
|
||||
.h_active_pixels = 800,
|
||||
|
||||
.v_sync_polarity = false,
|
||||
.v_front_porch = 3,
|
||||
.v_sync_width = 4,
|
||||
.v_back_porch = 11,
|
||||
.v_active_lines = 600,
|
||||
|
||||
.bit_clk_khz = 354000
|
||||
};
|
||||
|
||||
// Also known as qHD, bit uncommon, but it's a nice modest-resolution 16:9
|
||||
// aspect mode. Pixel clock 37.3 MHz
|
||||
const struct dvi_timing __dvi_const(dvi_timing_960x540p_60hz) = {
|
||||
.h_sync_polarity = true,
|
||||
.h_front_porch = 16,
|
||||
.h_sync_width = 32,
|
||||
.h_back_porch = 96,
|
||||
.h_active_pixels = 960,
|
||||
|
||||
.v_sync_polarity = true,
|
||||
.v_front_porch = 2,
|
||||
.v_sync_width = 6,
|
||||
.v_back_porch = 15,
|
||||
.v_active_lines = 540,
|
||||
|
||||
.bit_clk_khz = 372000
|
||||
};
|
||||
|
||||
// Note this is NOT the correct 720p30 CEA mode, but rather 720p60 run at half
|
||||
// pixel clock. Seems to be commonly accepted (and is a valid CVT mode). The
|
||||
// actual CEA mode is the same pixel clock as 720p60 but with >50% blanking,
|
||||
// which would require a clk_sys of 742 MHz!
|
||||
const struct dvi_timing __dvi_const(dvi_timing_1280x720p_30hz) = {
|
||||
.h_sync_polarity = true,
|
||||
.h_front_porch = 110,
|
||||
.h_sync_width = 40,
|
||||
.h_back_porch = 220,
|
||||
.h_active_pixels = 1280,
|
||||
|
||||
.v_sync_polarity = true,
|
||||
.v_front_porch = 5,
|
||||
.v_sync_width = 5,
|
||||
.v_back_porch = 20,
|
||||
.v_active_lines = 720,
|
||||
|
||||
.bit_clk_khz = 372000
|
||||
};
|
||||
|
||||
// Reduced-blanking (CVT) 720p. You aren't supposed to use reduced blanking
|
||||
// modes below 60 Hz, but I won't tell anyone (and it works on the monitors
|
||||
// I've tried). This nets a lower system clock than regular 720p30 (319 MHz)
|
||||
const struct dvi_timing __dvi_const(dvi_timing_1280x720p_reduced_30hz) = {
|
||||
.h_sync_polarity = true,
|
||||
.h_front_porch = 48,
|
||||
.h_sync_width = 32,
|
||||
.h_back_porch = 80,
|
||||
.h_active_pixels = 1280,
|
||||
|
||||
.v_sync_polarity = false,
|
||||
.v_front_porch = 3,
|
||||
.v_sync_width = 5,
|
||||
.v_back_porch = 13,
|
||||
.v_active_lines = 720,
|
||||
|
||||
.bit_clk_khz = 319200
|
||||
};
|
||||
|
||||
// This requires a spicy 488 MHz system clock and is illegal in most countries
|
||||
// (you need to have a very lucky piece of silicon to run this at 1.3 V, or
|
||||
// connect an external supply and give it a bit more juice)
|
||||
const struct dvi_timing __dvi_const(dvi_timing_1600x900p_reduced_30hz) = {
|
||||
.h_sync_polarity = true,
|
||||
.h_front_porch = 48,
|
||||
.h_sync_width = 32,
|
||||
.h_back_porch = 80,
|
||||
.h_active_pixels = 1600,
|
||||
|
||||
.v_sync_polarity = false,
|
||||
.v_front_porch = 3,
|
||||
.v_sync_width = 5,
|
||||
.v_back_porch = 18,
|
||||
.v_active_lines = 900,
|
||||
|
||||
.bit_clk_khz = 488000
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// The DMA scheme is:
|
||||
//
|
||||
// - One channel transferring data to each of the three PIO state machines
|
||||
// performing TMDS serialisation
|
||||
//
|
||||
// - One channel programming the registers of each of these data channels,
|
||||
// triggered (CHAIN_TO) each time the corresponding data channel completes
|
||||
//
|
||||
// - Lanes 1 and 2 have one block for blanking and one for video data
|
||||
//
|
||||
// - Lane 0 has one block for each horizontal region (front porch, hsync, back
|
||||
// porch, active)
|
||||
//
|
||||
// - The IRQ_QUIET flag is used to select which data block on the sync lane is
|
||||
// allowed to generate an IRQ upon completion. This is the block immediately
|
||||
// before the horizontal active region. The IRQ is entered at ~the same time
|
||||
// as the last data transfer starts
|
||||
//
|
||||
// - The IRQ points the control channels at new blocklists for next scanline.
|
||||
// The DMA starts the new list automatically at end-of-scanline, via
|
||||
// CHAIN_TO.
|
||||
//
|
||||
// The horizontal active region is the longest continuous transfer, so this
|
||||
// gives the most time to handle the IRQ and load new blocklists.
|
||||
//
|
||||
// Note a null trigger IRQ is not suitable because we get that *after* the
|
||||
// last data transfer finishes, and the FIFOs bottom out very shortly
|
||||
// afterward. For pure DVI (four blocks per scanline), it works ok to take
|
||||
// four regular IRQs per scanline and return early from 3 of them, but this
|
||||
// breaks down when you have very short scanline sections like guard bands.
|
||||
|
||||
// Each symbol appears twice, concatenated in one word. Note these must be in
|
||||
// RAM because they see a lot of DMA traffic
|
||||
const uint32_t __dvi_const(dvi_ctrl_syms)[4] = {
|
||||
0xd5354,
|
||||
0x2acab,
|
||||
0x55154,
|
||||
0xaaeab
|
||||
};
|
||||
|
||||
// Output solid red scanline if we are given NULL for tmdsbuff
|
||||
#if DVI_SYMBOLS_PER_WORD == 2
|
||||
static uint32_t __dvi_const(empty_scanline_tmds)[3] = {
|
||||
0x7fd00u, // 0x00, 0x00
|
||||
0x7fd00u, // 0x00, 0x00
|
||||
0xbfa01u // 0xfc, 0xfc
|
||||
};
|
||||
#else
|
||||
static uint32_t __attribute__((aligned(8))) __dvi_const(empty_scanline_tmds)[6] = {
|
||||
0x100u, 0x1ffu, // 0x00, 0x00
|
||||
0x100u, 0x1ffu, // 0x00, 0x00
|
||||
0x201u, 0x2feu // 0xfc, 0xfc
|
||||
};
|
||||
#endif
|
||||
|
||||
void dvi_timing_state_init(struct dvi_timing_state *t) {
|
||||
t->v_ctr = 0;
|
||||
t->v_state = DVI_STATE_FRONT_PORCH;
|
||||
};
|
||||
|
||||
void __dvi_func(dvi_timing_state_advance)(const struct dvi_timing *t, struct dvi_timing_state *s) {
|
||||
s->v_ctr++;
|
||||
if ((s->v_state == DVI_STATE_FRONT_PORCH && s->v_ctr == t->v_front_porch) ||
|
||||
(s->v_state == DVI_STATE_SYNC && s->v_ctr == t->v_sync_width) ||
|
||||
(s->v_state == DVI_STATE_BACK_PORCH && s->v_ctr == t->v_back_porch) ||
|
||||
(s->v_state == DVI_STATE_ACTIVE && s->v_ctr == t->v_active_lines)) {
|
||||
|
||||
s->v_state = (s->v_state + 1) % DVI_STATE_COUNT;
|
||||
s->v_ctr = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void dvi_scanline_dma_list_init(struct dvi_scanline_dma_list *dma_list) {
|
||||
*dma_list = (struct dvi_scanline_dma_list){};
|
||||
}
|
||||
|
||||
static const uint32_t *get_ctrl_sym(bool vsync, bool hsync) {
|
||||
return &dvi_ctrl_syms[!!vsync << 1 | !!hsync];
|
||||
}
|
||||
|
||||
// Make a sequence of paced transfers to the relevant FIFO
|
||||
static void _set_data_cb(dma_cb_t *cb, const struct dvi_lane_dma_cfg *dma_cfg,
|
||||
const void *read_addr, uint transfer_count, uint read_ring, bool irq_on_finish) {
|
||||
cb->read_addr = read_addr;
|
||||
cb->write_addr = dma_cfg->tx_fifo;
|
||||
cb->transfer_count = transfer_count;
|
||||
cb->c = dma_channel_get_default_config(dma_cfg->chan_data);
|
||||
channel_config_set_ring(&cb->c, false, read_ring);
|
||||
channel_config_set_dreq(&cb->c, dma_cfg->dreq);
|
||||
// Call back to control channel for reconfiguration:
|
||||
channel_config_set_chain_to(&cb->c, dma_cfg->chan_ctrl);
|
||||
// Note we never send a null trigger, so IRQ_QUIET is an IRQ suppression flag
|
||||
channel_config_set_irq_quiet(&cb->c, !irq_on_finish);
|
||||
};
|
||||
|
||||
void dvi_setup_scanline_for_vblank(const struct dvi_timing *t, const struct dvi_lane_dma_cfg dma_cfg[],
|
||||
bool vsync_asserted, struct dvi_scanline_dma_list *l) {
|
||||
|
||||
bool vsync = t->v_sync_polarity == vsync_asserted;
|
||||
const uint32_t *sym_hsync_off = get_ctrl_sym(vsync, !t->h_sync_polarity);
|
||||
const uint32_t *sym_hsync_on = get_ctrl_sym(vsync, t->h_sync_polarity);
|
||||
const uint32_t *sym_no_sync = get_ctrl_sym(false, false );
|
||||
|
||||
dma_cb_t *synclist = dvi_lane_from_list(l, TMDS_SYNC_LANE);
|
||||
// The symbol table contains each control symbol *twice*, concatenated into 20 LSBs of table word, so we can always do word-repeat.
|
||||
_set_data_cb(&synclist[0], &dma_cfg[TMDS_SYNC_LANE], sym_hsync_off, t->h_front_porch / DVI_SYMBOLS_PER_WORD, 2, false);
|
||||
_set_data_cb(&synclist[1], &dma_cfg[TMDS_SYNC_LANE], sym_hsync_on, t->h_sync_width / DVI_SYMBOLS_PER_WORD, 2, false);
|
||||
_set_data_cb(&synclist[2], &dma_cfg[TMDS_SYNC_LANE], sym_hsync_off, t->h_back_porch / DVI_SYMBOLS_PER_WORD, 2, true);
|
||||
_set_data_cb(&synclist[3], &dma_cfg[TMDS_SYNC_LANE], sym_hsync_off, t->h_active_pixels / DVI_SYMBOLS_PER_WORD, 2, false);
|
||||
|
||||
for (int i = 0; i < N_TMDS_LANES; ++i) {
|
||||
if (i == TMDS_SYNC_LANE)
|
||||
continue;
|
||||
dma_cb_t *cblist = dvi_lane_from_list(l, i);
|
||||
_set_data_cb(&cblist[0], &dma_cfg[i], sym_no_sync,(t->h_front_porch + t->h_sync_width + t->h_back_porch) / DVI_SYMBOLS_PER_WORD, 2, false);
|
||||
_set_data_cb(&cblist[1], &dma_cfg[i], sym_no_sync, t->h_active_pixels / DVI_SYMBOLS_PER_WORD, 2, false);
|
||||
}
|
||||
}
|
||||
|
||||
void dvi_setup_scanline_for_active(const struct dvi_timing *t, const struct dvi_lane_dma_cfg dma_cfg[],
|
||||
uint32_t *tmdsbuf, struct dvi_scanline_dma_list *l) {
|
||||
|
||||
const uint32_t *sym_hsync_off = get_ctrl_sym(!t->v_sync_polarity, !t->h_sync_polarity);
|
||||
const uint32_t *sym_hsync_on = get_ctrl_sym(!t->v_sync_polarity, t->h_sync_polarity);
|
||||
const uint32_t *sym_no_sync = get_ctrl_sym(false, false );
|
||||
|
||||
dma_cb_t *synclist = dvi_lane_from_list(l, TMDS_SYNC_LANE);
|
||||
_set_data_cb(&synclist[0], &dma_cfg[TMDS_SYNC_LANE], sym_hsync_off, t->h_front_porch / DVI_SYMBOLS_PER_WORD, 2, false);
|
||||
_set_data_cb(&synclist[1], &dma_cfg[TMDS_SYNC_LANE], sym_hsync_on, t->h_sync_width / DVI_SYMBOLS_PER_WORD, 2, false);
|
||||
_set_data_cb(&synclist[2], &dma_cfg[TMDS_SYNC_LANE], sym_hsync_off, t->h_back_porch / DVI_SYMBOLS_PER_WORD, 2, true);
|
||||
|
||||
for (int i = 0; i < N_TMDS_LANES; ++i) {
|
||||
dma_cb_t *cblist = dvi_lane_from_list(l, i);
|
||||
if (i != TMDS_SYNC_LANE) {
|
||||
_set_data_cb(&cblist[0], &dma_cfg[i], sym_no_sync,
|
||||
(t->h_front_porch + t->h_sync_width + t->h_back_porch) / DVI_SYMBOLS_PER_WORD, 2, false);
|
||||
}
|
||||
int target_block = i == TMDS_SYNC_LANE ? DVI_SYNC_LANE_CHUNKS - 1 : DVI_NOSYNC_LANE_CHUNKS - 1;
|
||||
if (tmdsbuf) {
|
||||
// Non-repeating DMA for the freshly-encoded TMDS buffer
|
||||
_set_data_cb(&cblist[target_block], &dma_cfg[i], tmdsbuf + i * (t->h_active_pixels / DVI_SYMBOLS_PER_WORD),
|
||||
t->h_active_pixels / DVI_SYMBOLS_PER_WORD, 0, false);
|
||||
}
|
||||
else {
|
||||
// Use read ring to repeat the correct DC-balanced symbol pair on blank scanlines (4 or 8 byte period)
|
||||
_set_data_cb(&cblist[target_block], &dma_cfg[i], &empty_scanline_tmds[2 * i / DVI_SYMBOLS_PER_WORD],
|
||||
t->h_active_pixels / DVI_SYMBOLS_PER_WORD, DVI_SYMBOLS_PER_WORD == 2 ? 2 : 3, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void __dvi_func(dvi_update_scanline_data_dma)(const struct dvi_timing *t, const uint32_t *tmdsbuf, struct dvi_scanline_dma_list *l) {
|
||||
for (int i = 0; i < N_TMDS_LANES; ++i) {
|
||||
const uint32_t *lane_tmdsbuf = dvi_monochrome_tmds ? tmdsbuf : tmdsbuf + i * t->h_active_pixels / DVI_SYMBOLS_PER_WORD;
|
||||
if (i == TMDS_SYNC_LANE)
|
||||
dvi_lane_from_list(l, i)[3].read_addr = lane_tmdsbuf;
|
||||
else
|
||||
dvi_lane_from_list(l, i)[1].read_addr = lane_tmdsbuf;
|
||||
}
|
||||
}
|
||||
|
||||
99
src/libdvi/dvi_timing.h
Normal file
99
src/libdvi/dvi_timing.h
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
#ifndef _DVI_TIMING_H
|
||||
#define _DVI_TIMING_H
|
||||
|
||||
#include "hardware/dma.h"
|
||||
#include "pico/util/queue.h"
|
||||
|
||||
#include "dvi.h"
|
||||
|
||||
struct dvi_timing {
|
||||
bool h_sync_polarity;
|
||||
uint h_front_porch;
|
||||
uint h_sync_width;
|
||||
uint h_back_porch;
|
||||
uint h_active_pixels;
|
||||
|
||||
bool v_sync_polarity;
|
||||
uint v_front_porch;
|
||||
uint v_sync_width;
|
||||
uint v_back_porch;
|
||||
uint v_active_lines;
|
||||
|
||||
uint bit_clk_khz;
|
||||
};
|
||||
|
||||
enum dvi_line_state {
|
||||
DVI_STATE_FRONT_PORCH = 0,
|
||||
DVI_STATE_SYNC,
|
||||
DVI_STATE_BACK_PORCH,
|
||||
DVI_STATE_ACTIVE,
|
||||
DVI_STATE_COUNT
|
||||
};
|
||||
|
||||
struct dvi_timing_state {
|
||||
uint v_ctr;
|
||||
enum dvi_line_state v_state;
|
||||
};
|
||||
|
||||
// This should map directly to DMA register layout, but more convenient types
|
||||
// (also this really shouldn't be here... we don't have a dma_cb in the SDK
|
||||
// because there are many valid formats due to aliases)
|
||||
typedef struct dma_cb {
|
||||
const void *read_addr;
|
||||
void *write_addr;
|
||||
uint32_t transfer_count;
|
||||
dma_channel_config c;
|
||||
} dma_cb_t;
|
||||
|
||||
static_assert(sizeof(dma_cb_t) == 4 * sizeof(uint32_t), "bad dma layout");
|
||||
static_assert(__builtin_offsetof(dma_cb_t, c.ctrl) == __builtin_offsetof(dma_channel_hw_t, ctrl_trig), "bad dma layout");
|
||||
|
||||
#define DVI_SYNC_LANE_CHUNKS DVI_STATE_COUNT
|
||||
#define DVI_NOSYNC_LANE_CHUNKS 2
|
||||
|
||||
struct dvi_scanline_dma_list {
|
||||
dma_cb_t l0[DVI_SYNC_LANE_CHUNKS];
|
||||
dma_cb_t l1[DVI_NOSYNC_LANE_CHUNKS];
|
||||
dma_cb_t l2[DVI_NOSYNC_LANE_CHUNKS];
|
||||
};
|
||||
|
||||
static inline dma_cb_t* dvi_lane_from_list(struct dvi_scanline_dma_list *l, int i) {
|
||||
return i == 0 ? l->l0 : i == 1 ? l->l1 : l->l2;
|
||||
}
|
||||
|
||||
// Each TMDS lane uses one DMA channel to transfer data to a PIO state
|
||||
// machine, and another channel to load control blocks into this channel.
|
||||
struct dvi_lane_dma_cfg {
|
||||
uint chan_ctrl;
|
||||
uint chan_data;
|
||||
void *tx_fifo;
|
||||
uint dreq;
|
||||
};
|
||||
|
||||
// Note these are already converted to pseudo-differential representation
|
||||
extern const uint32_t dvi_ctrl_syms[4];
|
||||
|
||||
extern const struct dvi_timing dvi_timing_640x480p_60hz;
|
||||
extern const struct dvi_timing dvi_timing_800x480p_60hz;
|
||||
extern const struct dvi_timing dvi_timing_800x600p_60hz;
|
||||
extern const struct dvi_timing dvi_timing_960x540p_60hz;
|
||||
extern const struct dvi_timing dvi_timing_1280x720p_30hz;
|
||||
|
||||
extern const struct dvi_timing dvi_timing_800x600p_reduced_60hz;
|
||||
extern const struct dvi_timing dvi_timing_1280x720p_reduced_30hz;
|
||||
|
||||
void dvi_timing_state_init(struct dvi_timing_state *t);
|
||||
|
||||
void dvi_timing_state_advance(const struct dvi_timing *t, struct dvi_timing_state *s);
|
||||
|
||||
void dvi_scanline_dma_list_init(struct dvi_scanline_dma_list *dma_list);
|
||||
|
||||
void dvi_setup_scanline_for_vblank(const struct dvi_timing *t, const struct dvi_lane_dma_cfg dma_cfg[],
|
||||
bool vsync_asserted, struct dvi_scanline_dma_list *l);
|
||||
|
||||
void dvi_setup_scanline_for_active(const struct dvi_timing *t, const struct dvi_lane_dma_cfg dma_cfg[],
|
||||
uint32_t *tmdsbuf, struct dvi_scanline_dma_list *l);
|
||||
|
||||
void dvi_update_scanline_data_dma(const struct dvi_timing *t, const uint32_t *tmdsbuf, struct dvi_scanline_dma_list *l);
|
||||
|
||||
#endif
|
||||
623
src/libdvi/tmds_encode.S
Normal file
623
src/libdvi/tmds_encode.S
Normal file
|
|
@ -0,0 +1,623 @@
|
|||
#include "hardware/regs/addressmap.h"
|
||||
#include "hardware/regs/sio.h"
|
||||
#include "dvi_config_defs.h"
|
||||
|
||||
// Offsets suitable for ldr/str (must be <= 0x7c):
|
||||
#define ACCUM0_OFFS (SIO_INTERP0_ACCUM0_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
|
||||
#define ACCUM1_OFFS (SIO_INTERP0_ACCUM1_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
|
||||
#define ACCUM1_ADD_OFFS (SIO_INTERP0_ACCUM1_ADD_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
|
||||
#define PEEK0_OFFS (SIO_INTERP0_PEEK_LANE0_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
|
||||
#define PEEK1_OFFS (SIO_INTERP0_PEEK_LANE1_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
|
||||
#define PEEK2_OFFS (SIO_INTERP0_PEEK_FULL_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
|
||||
#define INTERP1 (SIO_INTERP1_ACCUM0_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
|
||||
// Note the entirety of INTERP0 and INTERP1 fits inside this 5-bit
|
||||
// word-addressed space... almost as though it were intentional! :)
|
||||
|
||||
.syntax unified
|
||||
.cpu cortex-m0plus
|
||||
.thumb
|
||||
|
||||
.macro decl_func_x name
|
||||
.section .scratch_x.\name, "ax"
|
||||
.global \name
|
||||
.type \name,%function
|
||||
.thumb_func
|
||||
\name:
|
||||
.endm
|
||||
|
||||
.macro decl_func_y name
|
||||
.section .scratch_y.\name, "ax"
|
||||
.global \name
|
||||
.type \name,%function
|
||||
.thumb_func
|
||||
\name:
|
||||
.endm
|
||||
|
||||
#define decl_func decl_func_x
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Pixel-doubling encoders for RGB
|
||||
|
||||
// r0: Input buffer (word-aligned)
|
||||
// r1: Output buffer (word-aligned)
|
||||
// r2: Input size (pixels)
|
||||
|
||||
.macro do_channel_16bpp r_ibase r_inout0 r_out1
|
||||
str \r_inout0, [\r_ibase, #ACCUM0_OFFS]
|
||||
ldr \r_inout0, [\r_ibase, #PEEK0_OFFS]
|
||||
ldr \r_inout0, [\r_inout0]
|
||||
ldr \r_out1, [\r_ibase, #PEEK1_OFFS]
|
||||
ldr \r_out1, [\r_out1]
|
||||
.endm
|
||||
|
||||
decl_func tmds_encode_loop_16bpp
|
||||
push {r4, r5, r6, r7, lr}
|
||||
lsls r2, #2
|
||||
add r2, r1
|
||||
mov ip, r2
|
||||
ldr r2, =(SIO_BASE + SIO_INTERP0_ACCUM0_OFFSET)
|
||||
b 2f
|
||||
.align 2
|
||||
1:
|
||||
.rept TMDS_ENCODE_UNROLL
|
||||
ldmia r0!, {r4, r6}
|
||||
do_channel_16bpp r2, r4, r5
|
||||
do_channel_16bpp r2, r6, r7
|
||||
stmia r1!, {r4, r5, r6, r7}
|
||||
.endr
|
||||
2:
|
||||
cmp r1, ip
|
||||
bne 1b
|
||||
pop {r4, r5, r6, r7, pc}
|
||||
|
||||
// Same as above, but scale data to make up for lack of left shift
|
||||
// in interpolator (costs 1 cycle per 2 pixels)
|
||||
//
|
||||
// r0: Input buffer (word-aligned)
|
||||
// r1: Output buffer (word-aligned)
|
||||
// r2: Input size (pixels)
|
||||
// r3: Left shift amount
|
||||
|
||||
decl_func tmds_encode_loop_16bpp_leftshift
|
||||
push {r4, r5, r6, r7, lr}
|
||||
lsls r2, #2
|
||||
add r2, r1
|
||||
mov ip, r2
|
||||
ldr r2, =(SIO_BASE + SIO_INTERP0_ACCUM0_OFFSET)
|
||||
b 2f
|
||||
.align 2
|
||||
1:
|
||||
.rept TMDS_ENCODE_UNROLL
|
||||
ldmia r0!, {r4, r6}
|
||||
lsls r4, r3
|
||||
do_channel_16bpp r2, r4, r5
|
||||
lsls r6, r3
|
||||
do_channel_16bpp r2, r6, r7
|
||||
stmia r1!, {r4, r5, r6, r7}
|
||||
.endr
|
||||
2:
|
||||
cmp r1, ip
|
||||
bne 1b
|
||||
pop {r4, r5, r6, r7, pc}
|
||||
|
||||
// r0: Input buffer (word-aligned)
|
||||
// r1: Output buffer (word-aligned)
|
||||
// r2: Input size (pixels)
|
||||
|
||||
decl_func tmds_encode_loop_8bpp
|
||||
push {r4, r5, r6, r7, lr}
|
||||
lsls r2, #2
|
||||
add r2, r1
|
||||
mov ip, r2
|
||||
ldr r2, =(SIO_BASE + SIO_INTERP0_ACCUM0_OFFSET)
|
||||
b 2f
|
||||
.align 2
|
||||
1:
|
||||
.rept TMDS_ENCODE_UNROLL
|
||||
ldmia r0!, {r4}
|
||||
str r4, [r2, #ACCUM0_OFFS + INTERP1]
|
||||
str r4, [r2, #ACCUM0_OFFS]
|
||||
ldr r4, [r2, #PEEK0_OFFS]
|
||||
ldr r4, [r4]
|
||||
ldr r5, [r2, #PEEK1_OFFS]
|
||||
ldr r5, [r5]
|
||||
ldr r6, [r2, #PEEK0_OFFS + INTERP1]
|
||||
ldr r6, [r6]
|
||||
ldr r7, [r2, #PEEK1_OFFS + INTERP1]
|
||||
ldr r7, [r7]
|
||||
stmia r1!, {r4, r5, r6, r7}
|
||||
.endr
|
||||
2:
|
||||
cmp r1, ip
|
||||
bne 1b
|
||||
pop {r4, r5, r6, r7, pc}
|
||||
|
||||
// r0: Input buffer (word-aligned)
|
||||
// r1: Output buffer (word-aligned)
|
||||
// r2: Input size (pixels)
|
||||
// r3: Left shift amount
|
||||
//
|
||||
// Note that only the data written to interp0 (pixel 0, 1) is leftshifted, not
|
||||
// the data written to interp1 (pixel 2, 3). Otherwise we always lose MSBs, as
|
||||
// the LUT offset MSB is at bit 8, so pixel 0 always requires some left shift,
|
||||
// since its channel MSBs are no greater than 7.
|
||||
|
||||
decl_func tmds_encode_loop_8bpp_leftshift
|
||||
push {r4, r5, r6, r7, lr}
|
||||
lsls r2, #3
|
||||
add r2, r1
|
||||
mov ip, r2
|
||||
ldr r2, =(SIO_BASE + SIO_INTERP0_ACCUM0_OFFSET)
|
||||
b 2f
|
||||
.align 2
|
||||
1:
|
||||
.rept TMDS_ENCODE_UNROLL
|
||||
ldmia r0!, {r4}
|
||||
str r4, [r2, #ACCUM0_OFFS + INTERP1]
|
||||
lsls r4, r3
|
||||
str r4, [r2, #ACCUM0_OFFS]
|
||||
ldr r4, [r2, #PEEK0_OFFS]
|
||||
ldr r4, [r4]
|
||||
ldr r5, [r2, #PEEK1_OFFS]
|
||||
ldr r5, [r5]
|
||||
ldr r6, [r2, #PEEK0_OFFS + INTERP1]
|
||||
ldr r6, [r6]
|
||||
ldr r7, [r2, #PEEK1_OFFS + INTERP1]
|
||||
ldr r7, [r7]
|
||||
stmia r1!, {r4, r5, r6, r7}
|
||||
.endr
|
||||
2:
|
||||
cmp r1, ip
|
||||
bne 1b
|
||||
pop {r4, r5, r6, r7, pc}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Fast 1bpp black/white encoder (full res)
|
||||
|
||||
// Taking the encoder from DVI spec, with initial balance 0:
|
||||
//
|
||||
// - Encoding either 0x00 or 0xff will produce a running balance of -8, with
|
||||
// output symbol of 0x100 or 0x200
|
||||
//
|
||||
// - Subsequently encoding either 0x01 or 0xfe will return the balance to 0, with
|
||||
// output symbol of 0x1ff or 0x2ff
|
||||
//
|
||||
// So we can do 1bpp encode with a lookup of x coordinate LSB, and input
|
||||
// colour bit. If we process pixels in even-sized blocks, only the colour
|
||||
// lookup is needed.
|
||||
|
||||
// Encode 8 pixels @ 1bpp (using two table lookups)
|
||||
// r3 contains lookup mask (preshifted)
|
||||
// r8 contains pointer to encode table
|
||||
// 2.125 cyc/pix
|
||||
.macro tmds_encode_1bpp_body shift_instr0 shamt0 shift_instr1 shamt1
|
||||
\shift_instr0 r4, r2, #\shamt0
|
||||
ands r4, r3
|
||||
add r4, r8
|
||||
ldmia r4, {r4, r5}
|
||||
\shift_instr1 r6, r2, #\shamt1
|
||||
ands r6, r3
|
||||
add r6, r8
|
||||
ldmia r6, {r6, r7}
|
||||
stmia r1!, {r4, r5, r6, r7}
|
||||
.endm
|
||||
|
||||
// r0: input buffer (word-aligned)
|
||||
// r1: output buffer (word-aligned)
|
||||
// r2: output pixel count
|
||||
decl_func tmds_encode_1bpp
|
||||
push {r4-r7, lr}
|
||||
mov r7, r8
|
||||
push {r7}
|
||||
lsls r2, #1
|
||||
add r2, r1
|
||||
mov ip, r2
|
||||
adr r4, tmds_1bpp_table
|
||||
mov r8, r4
|
||||
// Mask: 4 bit index, 8 bytes per entry
|
||||
movs r3, #0x78
|
||||
b 2f
|
||||
1:
|
||||
ldmia r0!, {r2}
|
||||
#if !DVI_1BPP_BIT_REVERSE
|
||||
tmds_encode_1bpp_body lsls 3 lsrs 1
|
||||
tmds_encode_1bpp_body lsrs 5 lsrs 9
|
||||
tmds_encode_1bpp_body lsrs 13 lsrs 17
|
||||
tmds_encode_1bpp_body lsrs 21 lsrs 25
|
||||
#else
|
||||
tmds_encode_1bpp_body lsrs 1 lsls 3
|
||||
tmds_encode_1bpp_body lsrs 9 lsrs 5
|
||||
tmds_encode_1bpp_body lsrs 17 lsrs 13
|
||||
tmds_encode_1bpp_body lsrs 25 lsrs 21
|
||||
#endif
|
||||
2:
|
||||
cmp r1, ip
|
||||
blo 1b
|
||||
|
||||
pop {r7}
|
||||
mov r8, r7
|
||||
pop {r4-r7, pc}
|
||||
|
||||
.align 2
|
||||
tmds_1bpp_table:
|
||||
#if !DVI_1BPP_BIT_REVERSE
|
||||
.word 0x7fd00, 0x7fd00 // 0000
|
||||
.word 0x7fe00, 0x7fd00 // 0001
|
||||
.word 0xbfd00, 0x7fd00 // 0010
|
||||
.word 0xbfe00, 0x7fd00 // 0011
|
||||
.word 0x7fd00, 0x7fe00 // 0100
|
||||
.word 0x7fe00, 0x7fe00 // 0101
|
||||
.word 0xbfd00, 0x7fe00 // 0110
|
||||
.word 0xbfe00, 0x7fe00 // 0111
|
||||
.word 0x7fd00, 0xbfd00 // 1000
|
||||
.word 0x7fe00, 0xbfd00 // 1001
|
||||
.word 0xbfd00, 0xbfd00 // 1010
|
||||
.word 0xbfe00, 0xbfd00 // 1011
|
||||
.word 0x7fd00, 0xbfe00 // 1100
|
||||
.word 0x7fe00, 0xbfe00 // 1101
|
||||
.word 0xbfd00, 0xbfe00 // 1110
|
||||
.word 0xbfe00, 0xbfe00 // 1111
|
||||
#else
|
||||
.word 0x7fd00, 0x7fd00 // 0000
|
||||
.word 0x7fd00, 0xbfd00 // 1000
|
||||
.word 0x7fd00, 0x7fe00 // 0100
|
||||
.word 0x7fd00, 0xbfe00 // 1100
|
||||
.word 0xbfd00, 0x7fd00 // 0010
|
||||
.word 0xbfd00, 0xbfd00 // 1010
|
||||
.word 0xbfd00, 0x7fe00 // 0110
|
||||
.word 0xbfd00, 0xbfe00 // 1110
|
||||
.word 0x7fe00, 0x7fd00 // 0001
|
||||
.word 0x7fe00, 0xbfd00 // 1001
|
||||
.word 0x7fe00, 0x7fe00 // 0101
|
||||
.word 0x7fe00, 0xbfe00 // 1101
|
||||
.word 0xbfe00, 0x7fd00 // 0011
|
||||
.word 0xbfe00, 0xbfd00 // 1011
|
||||
.word 0xbfe00, 0x7fe00 // 0111
|
||||
.word 0xbfe00, 0xbfe00 // 1111
|
||||
#endif
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Full-resolution 2bpp encode (for 2bpp grayscale, or bitplaned RGB222)
|
||||
|
||||
// Even-x-position pixels are encoded as symbols with imbalance -4, and odd
|
||||
// pixels with +4, so that we can mix-and-match our even/odd codewords and
|
||||
// always get a properly balanced sequence:
|
||||
//
|
||||
// level 0: (05 -> 103), then (04 -> 1fc) (decimal 5, 4)
|
||||
// level 1: (50 -> 130), then (51 -> 1cf) (decimal 80, 81)
|
||||
// level 2: (af -> 230), then (ae -> 2cf) (decimal 175, 174)
|
||||
// level 3: (fa -> 203), then (fb -> 2fc) (decimal 250, 251)
|
||||
//
|
||||
// These correspond to roughly 255 times (0, 1/3, 2/3, 1).
|
||||
//
|
||||
// Alternatively we could use symbols with 0 balance, which results in lower
|
||||
// contrast but avoids the LSB bobble:
|
||||
//
|
||||
// level 0: (10 -> 1f0) always
|
||||
// level 1: (5a -> 263) always
|
||||
// level 2: (a5 -> 163) always
|
||||
// level 3: (ef -> 2f0) always
|
||||
|
||||
// Table base pointer in r0. Input pixels in r2.
|
||||
.macro encode_2bpp_body shift_instr shamt rd
|
||||
\shift_instr \rd, r2, #\shamt
|
||||
ands \rd, r3
|
||||
ldr \rd, [r0, \rd]
|
||||
.endm
|
||||
|
||||
// r0: input buffer (word-aligned)
|
||||
// r1: output buffer (word-aligned)
|
||||
// r2: output pixel count
|
||||
decl_func tmds_encode_2bpp
|
||||
push {r4-r7, lr}
|
||||
mov r7, r8
|
||||
push {r7}
|
||||
mov r8, r0
|
||||
adr r0, tmds_2bpp_table
|
||||
// Mask: 4-bit index into 4-byte entries.
|
||||
movs r3, #0x3c
|
||||
// Limit pointer: 1 word per 2 pixels
|
||||
lsls r2, #1
|
||||
add r2, r1
|
||||
mov ip, r2
|
||||
b 2f
|
||||
1:
|
||||
mov r4, r8
|
||||
ldmia r4!, {r2}
|
||||
mov r8, r4
|
||||
encode_2bpp_body lsls 2 r4
|
||||
encode_2bpp_body lsrs 2 r5
|
||||
encode_2bpp_body lsrs 6 r6
|
||||
encode_2bpp_body lsrs 10 r7
|
||||
stmia r1!, {r4-r7}
|
||||
encode_2bpp_body lsrs 14 r4
|
||||
encode_2bpp_body lsrs 18 r5
|
||||
encode_2bpp_body lsrs 22 r6
|
||||
encode_2bpp_body lsrs 26 r7
|
||||
stmia r1!, {r4-r7}
|
||||
2:
|
||||
cmp r1, ip
|
||||
blo 1b
|
||||
pop {r7}
|
||||
mov r8, r7
|
||||
pop {r4-r7, pc}
|
||||
|
||||
.align 2
|
||||
tmds_2bpp_table:
|
||||
.word 0x7f103 // 00, 00
|
||||
.word 0x7f130 // 01, 00
|
||||
.word 0x7f230 // 10, 00
|
||||
.word 0x7f203 // 11, 00
|
||||
.word 0x73d03 // 00, 01
|
||||
.word 0x73d30 // 01, 01
|
||||
.word 0x73e30 // 10, 01
|
||||
.word 0x73e03 // 11, 01
|
||||
.word 0xb3d03 // 00, 10
|
||||
.word 0xb3d30 // 01, 10
|
||||
.word 0xb3e30 // 10, 10
|
||||
.word 0xb3e03 // 11, 10
|
||||
.word 0xbf103 // 00, 11
|
||||
.word 0xbf130 // 01, 11
|
||||
.word 0xbf230 // 10, 11
|
||||
.word 0xbf203 // 11, 11
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Full-resolution RGB encode (not very practical)
|
||||
|
||||
// Non-doubled TMDS encode. 8.333 cycles per pixel, no exceptions. (This is
|
||||
// taking horizontal blanking (at VGA) and dual core into account, and
|
||||
// assuming the 3 channels are encoded individually.)
|
||||
//
|
||||
// Here is an idea
|
||||
// Have a table with a 7 bit lookup. The lookup is the 6 colour data bits (in
|
||||
// ACCUM0), concatenated with the sign bit of our running disparity (from
|
||||
// ACCUM1). Each table entry is a 20-bit TMDS symbol (pseudodifferential),
|
||||
// with the symbol's disparity stored left-justified in the upper 12 bits, as
|
||||
// e.g. a 6 bit signed integer.
|
||||
//
|
||||
// - Load pixel data. cyc: 0.75 (ldmia 2 words, every 4 pixels)
|
||||
// - Write pixel to ACCUM0. cyc: 1
|
||||
// - Read address from PEEK2. cyc: 1
|
||||
// - Load encoded pixel from address. cyc: 2
|
||||
// - Write disparity data to ACCUM1_ADD cyc: 1
|
||||
// - Write encoded data to output buffer. cyc: 1.25 (stmia 4 words, every 4 pixels)
|
||||
//
|
||||
// With decent register allocation we may be able to load 4 pixels at
|
||||
// once (2 words), and write 4 at once (4 words). This gives 7 cyc/pix.
|
||||
//
|
||||
// One issue is that the TMDS data in the bottom of ACCUM1 will eventually
|
||||
// overflow and affect the running disparity, but with 16 zeroes in between,
|
||||
// this would take much longer than one scanline, so everything is fine if
|
||||
// we clear the accumulator at the start of the scanline.
|
||||
//
|
||||
// Note that we need to use two interpolators to get the bits from both pixels
|
||||
// -- we are not outputting a single DC-balanced stream, but rather two
|
||||
// interleaved streams which are each DC-balanced. This is fine electrically,
|
||||
// but our output here will *NOT* match the TMDS encoder given in the DVI
|
||||
// spec.
|
||||
|
||||
// You can define TMDS_FULLRES_NO_DC_BALANCE to disable the running balance
|
||||
// feedback. With the feedback enabled (default), the output is DC balanced,
|
||||
// but there are just barely enough CPU cycles to do all the encode, so it's
|
||||
// essentially a party trick. If you disable DC balancing, the performance is
|
||||
// much better, and many monitors will still accept the signals as long as you
|
||||
// DC couple your DVI signals.
|
||||
|
||||
.macro tmds_fullres_encode_loop_body ra rb
|
||||
str \ra, [r2, #ACCUM0_OFFS + INTERP1]
|
||||
str \ra, [r2, #ACCUM0_OFFS]
|
||||
ldr \ra, [r2, #PEEK2_OFFS]
|
||||
ldr \ra, [\ra]
|
||||
#if !TMDS_FULLRES_NO_DC_BALANCE
|
||||
str \ra, [r2, #ACCUM1_ADD_OFFS]
|
||||
#endif
|
||||
ldr \rb, [r2, #PEEK2_OFFS + INTERP1]
|
||||
ldr \rb, [\rb]
|
||||
#if !TMDS_FULLRES_NO_DC_BALANCE
|
||||
str \rb, [r2, #ACCUM1_ADD_OFFS + INTERP1]
|
||||
#endif
|
||||
.endm
|
||||
|
||||
// r0: Input buffer (word-aligned)
|
||||
// r1: Output buffer (word-aligned)
|
||||
// r2: Pixel count
|
||||
|
||||
.macro tmds_fullres_encode_loop_16bpp
|
||||
push {r4-r7, lr}
|
||||
mov r4, r8
|
||||
push {r4}
|
||||
|
||||
|
||||
lsls r2, #2
|
||||
add r2, r1
|
||||
mov ip, r2
|
||||
ldr r2, =(SIO_BASE + SIO_INTERP0_ACCUM0_OFFSET)
|
||||
// DC balance defined to be 0 at start of scanline:
|
||||
movs r4, #0
|
||||
str r4, [r2, #ACCUM1_OFFS]
|
||||
#if TMDS_FULLRES_NO_DC_BALANCE
|
||||
// Alternate parity between odd/even symbols if no feedback
|
||||
mvns r4, r4
|
||||
#endif
|
||||
str r4, [r2, #ACCUM1_OFFS + INTERP1]
|
||||
|
||||
// Keep loop start pointer in r8 so we can get a longer backward branch
|
||||
adr r4, 1f
|
||||
adds r4, #1 // god damn thumb bit why is this a thing
|
||||
mov r8, r4
|
||||
b 2f
|
||||
.align 2
|
||||
1:
|
||||
.rept 16
|
||||
ldmia r0!, {r4, r6}
|
||||
tmds_fullres_encode_loop_body r4 r5
|
||||
tmds_fullres_encode_loop_body r6 r7
|
||||
stmia r1!, {r4, r5, r6, r7}
|
||||
.endr
|
||||
2:
|
||||
cmp r1, ip
|
||||
beq 1f
|
||||
bx r8
|
||||
1:
|
||||
pop {r4}
|
||||
mov r8, r4
|
||||
pop {r4-r7, pc}
|
||||
.endm
|
||||
|
||||
// One copy each in X and Y, so the two cores don't step on each other
|
||||
decl_func_x tmds_fullres_encode_loop_16bpp_x
|
||||
tmds_fullres_encode_loop_16bpp
|
||||
decl_func_y tmds_fullres_encode_loop_16bpp_y
|
||||
tmds_fullres_encode_loop_16bpp
|
||||
|
||||
|
||||
.macro tmds_fullres_encode_loop_body_leftshift ra rb
|
||||
// Note we apply the leftshift for INTERP0 only
|
||||
str \ra, [r2, #ACCUM0_OFFS + INTERP1]
|
||||
lsls \ra, r3
|
||||
str \ra, [r2, #ACCUM0_OFFS]
|
||||
ldr \ra, [r2, #PEEK2_OFFS]
|
||||
ldr \ra, [\ra]
|
||||
#if !TMDS_FULLRES_NO_DC_BALANCE
|
||||
str \ra, [r2, #ACCUM1_ADD_OFFS]
|
||||
#endif
|
||||
ldr \rb, [r2, #PEEK2_OFFS + INTERP1]
|
||||
ldr \rb, [\rb]
|
||||
#if !TMDS_FULLRES_NO_DC_BALANCE
|
||||
str \rb, [r2, #ACCUM1_ADD_OFFS + INTERP1]
|
||||
#endif
|
||||
.endm
|
||||
|
||||
// r0: Input buffer (word-aligned)
|
||||
// r1: Output buffer (word-aligned)
|
||||
// r2: Pixel count
|
||||
// r3: Left shift amount
|
||||
|
||||
.macro tmds_fullres_encode_loop_16bpp_leftshift
|
||||
push {r4-r7, lr}
|
||||
mov r4, r8
|
||||
mov r5, r9
|
||||
push {r4-r5}
|
||||
|
||||
lsls r2, #2
|
||||
add r2, r1
|
||||
mov ip, r2
|
||||
ldr r2, =(SIO_BASE + SIO_INTERP0_ACCUM0_OFFSET)
|
||||
// DC balance defined to be 0 at start of scanline:
|
||||
movs r4, #0
|
||||
str r4, [r2, #ACCUM1_OFFS]
|
||||
#if TMDS_FULLRES_NO_DC_BALANCE
|
||||
// Alternate parity between odd/even symbols if there's no balance feedback
|
||||
mvns r4, r4
|
||||
#endif
|
||||
str r4, [r2, #ACCUM1_OFFS + INTERP1]
|
||||
|
||||
adr r4, 1f
|
||||
adds r4, #1
|
||||
mov r8, r4
|
||||
b 2f
|
||||
.align 2
|
||||
1:
|
||||
.rept 16 // 64 pixels per iteration
|
||||
ldmia r0!, {r4, r6}
|
||||
tmds_fullres_encode_loop_body_leftshift r4 r5
|
||||
tmds_fullres_encode_loop_body_leftshift r6 r7
|
||||
stmia r1!, {r4, r5, r6, r7}
|
||||
.endr
|
||||
2:
|
||||
cmp r1, ip
|
||||
beq 1f
|
||||
bx r8
|
||||
1:
|
||||
pop {r4-r5}
|
||||
mov r8, r4
|
||||
mov r9, r5
|
||||
pop {r4-r7, pc}
|
||||
.endm
|
||||
|
||||
decl_func_x tmds_fullres_encode_loop_16bpp_leftshift_x
|
||||
tmds_fullres_encode_loop_16bpp_leftshift
|
||||
decl_func_y tmds_fullres_encode_loop_16bpp_leftshift_y
|
||||
tmds_fullres_encode_loop_16bpp_leftshift
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Full-resolution 8bpp paletted encode
|
||||
|
||||
// Variant of tmds_fullres_encode_loop_16bpp that reads
|
||||
// 8-bit wide pixels packed 4 per word. The interpolator
|
||||
// base is set to a reordered list of TMDS symbols based
|
||||
// on a user colour palette.
|
||||
|
||||
// Two pixels input in rd[17:2]. Two symbols output in rd[19:0]. r2 contains
|
||||
// interp base pointer. r7 used as temporary.
|
||||
.macro tmds_palette_encode_loop_body rd
|
||||
str \rd, [r2, #ACCUM0_OFFS]
|
||||
str \rd, [r2, #ACCUM0_OFFS + INTERP1]
|
||||
ldr \rd, [r2, #PEEK2_OFFS]
|
||||
ldr \rd, [\rd]
|
||||
#if !TMDS_FULLRES_NO_DC_BALANCE
|
||||
str \rd, [r2, #ACCUM1_ADD_OFFS]
|
||||
#endif
|
||||
ldr r7, [r2, #PEEK2_OFFS + INTERP1]
|
||||
ldr r7, [r7]
|
||||
#if !TMDS_FULLRES_NO_DC_BALANCE
|
||||
str r7, [r2, #ACCUM1_ADD_OFFS + INTERP1]
|
||||
#endif
|
||||
lsls r7, #10
|
||||
orrs \rd, r7
|
||||
.endm
|
||||
|
||||
.macro tmds_palette_encode_loop
|
||||
push {r4-r7, lr}
|
||||
mov r4, r8
|
||||
push {r4}
|
||||
|
||||
|
||||
lsls r2, #1
|
||||
add r2, r1
|
||||
mov ip, r2
|
||||
ldr r2, =(SIO_BASE + SIO_INTERP0_ACCUM0_OFFSET)
|
||||
// DC balance defined to be 0 at start of scanline:
|
||||
movs r4, #0
|
||||
str r4, [r2, #ACCUM1_OFFS]
|
||||
#if TMDS_FULLRES_NO_DC_BALANCE
|
||||
// Alternate parity between odd/even symbols if there's no balance feedback
|
||||
mvns r4, r4
|
||||
#endif
|
||||
str r4, [r2, #ACCUM1_OFFS + INTERP1]
|
||||
|
||||
// Keep loop start pointer in r8 so we can get a longer backward branch
|
||||
adr r4, 1f
|
||||
adds r4, #1 // god damn thumb bit why is this a thing
|
||||
mov r8, r4
|
||||
b 2f
|
||||
.align 2
|
||||
1:
|
||||
.rept 10
|
||||
ldmia r0!, {r3, r5}
|
||||
lsrs r4, r3, #14
|
||||
lsls r3, #2
|
||||
lsrs r6, r5, #14
|
||||
lsls r5, #2
|
||||
tmds_palette_encode_loop_body r3
|
||||
tmds_palette_encode_loop_body r4
|
||||
tmds_palette_encode_loop_body r5
|
||||
tmds_palette_encode_loop_body r6
|
||||
stmia r1!, {r3, r4, r5, r6}
|
||||
.endr
|
||||
2:
|
||||
cmp r1, ip
|
||||
beq 1f
|
||||
bx r8
|
||||
1:
|
||||
pop {r4}
|
||||
mov r8, r4
|
||||
pop {r4-r7, pc}
|
||||
.endm
|
||||
|
||||
decl_func_x tmds_palette_encode_loop_x
|
||||
tmds_palette_encode_loop
|
||||
decl_func_y tmds_palette_encode_loop_y
|
||||
tmds_palette_encode_loop
|
||||
305
src/libdvi/tmds_encode.c
Normal file
305
src/libdvi/tmds_encode.c
Normal file
|
|
@ -0,0 +1,305 @@
|
|||
#include "hardware/interp.h"
|
||||
#include "tmds_encode.h"
|
||||
#include "hardware/gpio.h"
|
||||
#include "hardware/sync.h"
|
||||
|
||||
static const uint32_t __scratch_x("tmds_table") tmds_table[] = {
|
||||
#include "tmds_table.h"
|
||||
};
|
||||
|
||||
// Fullres table is bandwidth-critical, so gets one copy for each scratch
|
||||
// memory. There is a third copy which can go in flash, because it's just used
|
||||
// to generate palette LUTs. The ones we don't use will get garbage collected
|
||||
// during linking.
|
||||
const uint32_t __scratch_x("tmds_table_fullres_x") tmds_table_fullres_x[] = {
|
||||
#include "tmds_table_fullres.h"
|
||||
};
|
||||
|
||||
const uint32_t __scratch_y("tmds_table_fullres_y") tmds_table_fullres_y[] = {
|
||||
#include "tmds_table_fullres.h"
|
||||
};
|
||||
|
||||
// Configure an interpolator to extract a single colour channel from each of a pair
|
||||
// of pixels, with the first pixel's lsb at pixel_lsb, and the pixels being
|
||||
// pixel_width wide. Produce a LUT address for the first pixel's colour data on
|
||||
// LANE0, and the second pixel's colour data on LANE1.
|
||||
//
|
||||
// Returns nonzero if the *_leftshift variant of the encoder loop must be used
|
||||
// (needed for blue channel because I was a stubborn idiot and didn't put
|
||||
// signed/bidirectional shift on interpolator, very slightly slower). The
|
||||
// return value is the size of left shift required.
|
||||
|
||||
static int __not_in_flash_func(configure_interp_for_addrgen)(interp_hw_t *interp, uint channel_msb, uint channel_lsb, uint pixel_lsb, uint pixel_width, uint lut_index_width, const uint32_t *lutbase) {
|
||||
interp_config c;
|
||||
const uint index_shift = 2; // scaled lookup for 4-byte LUT entries
|
||||
|
||||
int shift_channel_to_index = pixel_lsb + channel_msb - (lut_index_width - 1) - index_shift;
|
||||
int oops = 0;
|
||||
if (shift_channel_to_index < 0) {
|
||||
// "It's ok we'll fix it in software"
|
||||
oops = -shift_channel_to_index;
|
||||
shift_channel_to_index = 0;
|
||||
}
|
||||
|
||||
uint index_msb = index_shift + lut_index_width - 1;
|
||||
|
||||
c = interp_default_config();
|
||||
interp_config_set_shift(&c, shift_channel_to_index);
|
||||
interp_config_set_mask(&c, index_msb - (channel_msb - channel_lsb), index_msb);
|
||||
interp_set_config(interp, 0, &c);
|
||||
|
||||
c = interp_default_config();
|
||||
interp_config_set_shift(&c, pixel_width + shift_channel_to_index);
|
||||
interp_config_set_mask(&c, index_msb - (channel_msb - channel_lsb), index_msb);
|
||||
interp_config_set_cross_input(&c, true);
|
||||
interp_set_config(interp, 1, &c);
|
||||
|
||||
interp->base[0] = (uint32_t)lutbase;
|
||||
interp->base[1] = (uint32_t)lutbase;
|
||||
|
||||
return oops;
|
||||
}
|
||||
|
||||
// Extract up to 6 bits from a buffer of 16 bit pixels, and produce a buffer
|
||||
// of TMDS symbols from this colour channel. Number of pixels must be even,
|
||||
// pixel buffer must be word-aligned.
|
||||
|
||||
void __not_in_flash_func(tmds_encode_data_channel_16bpp)(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix, uint channel_msb, uint channel_lsb) {
|
||||
interp_hw_save_t interp0_save;
|
||||
interp_save(interp0_hw, &interp0_save);
|
||||
int require_lshift = configure_interp_for_addrgen(interp0_hw, channel_msb, channel_lsb, 0, 16, 6, tmds_table);
|
||||
if (require_lshift)
|
||||
tmds_encode_loop_16bpp_leftshift(pixbuf, symbuf, n_pix, require_lshift);
|
||||
else
|
||||
tmds_encode_loop_16bpp(pixbuf, symbuf, n_pix);
|
||||
interp_restore(interp0_hw, &interp0_save);
|
||||
}
|
||||
|
||||
// As above, but 8 bits per pixel, multiple of 4 pixels, and still word-aligned.
|
||||
void __not_in_flash_func(tmds_encode_data_channel_8bpp)(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix, uint channel_msb, uint channel_lsb) {
|
||||
interp_hw_save_t interp0_save, interp1_save;
|
||||
interp_save(interp0_hw, &interp0_save);
|
||||
interp_save(interp1_hw, &interp1_save);
|
||||
// Note that for 8bpp, some left shift is always required for pixel 0 (any
|
||||
// channel), which destroys some MSBs of pixel 3. To get around this, pixel
|
||||
// data sent to interp1 is *not left-shifted*
|
||||
int require_lshift = configure_interp_for_addrgen(interp0_hw, channel_msb, channel_lsb, 0, 8, 6, tmds_table);
|
||||
int lshift_upper = configure_interp_for_addrgen(interp1_hw, channel_msb, channel_lsb, 16, 8, 6, tmds_table);
|
||||
assert(!lshift_upper); (void)lshift_upper;
|
||||
if (require_lshift)
|
||||
tmds_encode_loop_8bpp_leftshift(pixbuf, symbuf, n_pix, require_lshift);
|
||||
else
|
||||
tmds_encode_loop_8bpp(pixbuf, symbuf, n_pix);
|
||||
interp_restore(interp0_hw, &interp0_save);
|
||||
interp_restore(interp1_hw, &interp1_save);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Code for full-resolution TMDS encode (barely possible, utterly impractical):
|
||||
|
||||
// Different scheme used for full res as the fun pixel-doubling DC balance
|
||||
// trick doesn't work, so we need to actually do running disparity. ACCUM0 has
|
||||
// pixel data, ACCUM1 has running disparity. INTERP0 is used to process even
|
||||
// pixels, and INTERP1 for odd pixels. Note this means that even and odd
|
||||
// symbols have their DC balance handled separately, which is not to spec.
|
||||
|
||||
static int __not_in_flash_func(configure_interp_for_addrgen_fullres)(interp_hw_t *interp, uint channel_msb, uint channel_lsb, uint lut_index_width, const uint32_t *lutbase) {
|
||||
const uint index_shift = 2; // scaled lookup for 4-byte LUT entries
|
||||
|
||||
int shift_channel_to_index = channel_msb - (lut_index_width - 1) - index_shift;
|
||||
int oops = 0;
|
||||
if (shift_channel_to_index < 0) {
|
||||
// "It's ok we'll fix it in software"
|
||||
oops = -shift_channel_to_index;
|
||||
shift_channel_to_index = 0;
|
||||
}
|
||||
|
||||
uint index_msb = index_shift + lut_index_width - 1;
|
||||
|
||||
interp_config c;
|
||||
// Shift and mask colour channel to lower 6 bits of LUT index (note lut_index_width excludes disparity sign)
|
||||
c = interp_default_config();
|
||||
interp_config_set_shift(&c, shift_channel_to_index);
|
||||
interp_config_set_mask(&c, index_msb - (channel_msb - channel_lsb), index_msb);
|
||||
interp_set_config(interp, 0, &c);
|
||||
|
||||
// Concatenate disparity (ACCUM1) sign onto the LUT index
|
||||
c = interp_default_config();
|
||||
interp_config_set_shift(&c, 30 - index_msb);
|
||||
interp_config_set_mask(&c, index_msb + 1, index_msb + 1);
|
||||
interp_set_config(interp, 1, &c);
|
||||
|
||||
interp->base[2] = (uint32_t)lutbase;
|
||||
|
||||
return oops;
|
||||
}
|
||||
|
||||
void __not_in_flash_func(tmds_encode_data_channel_fullres_16bpp)(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix, uint channel_msb, uint channel_lsb) {
|
||||
uint core = get_core_num();
|
||||
#if !TMDS_FULLRES_NO_INTERP_SAVE
|
||||
interp_hw_save_t interp0_save, interp1_save;
|
||||
interp_save(interp0_hw, &interp0_save);
|
||||
interp_save(interp1_hw, &interp1_save);
|
||||
#endif
|
||||
|
||||
// There is a copy of the inner loop and the LUT in both scratch X and
|
||||
// scratch Y memories. Use X on core 1 and Y on core 0 so the cores don't
|
||||
// tread on each other's toes too much.
|
||||
const uint32_t *lutbase = core ? tmds_table_fullres_x : tmds_table_fullres_y;
|
||||
int lshift_lower = configure_interp_for_addrgen_fullres(interp0_hw, channel_msb, channel_lsb, 6, lutbase);
|
||||
int lshift_upper = configure_interp_for_addrgen_fullres(interp1_hw, channel_msb + 16, channel_lsb + 16, 6, lutbase);
|
||||
assert(!lshift_upper); (void)lshift_upper;
|
||||
if (lshift_lower) {
|
||||
(core ?
|
||||
tmds_fullres_encode_loop_16bpp_leftshift_x :
|
||||
tmds_fullres_encode_loop_16bpp_leftshift_y
|
||||
)(pixbuf, symbuf, n_pix, lshift_lower);
|
||||
}
|
||||
else {
|
||||
(core ?
|
||||
tmds_fullres_encode_loop_16bpp_x :
|
||||
tmds_fullres_encode_loop_16bpp_y
|
||||
)(pixbuf, symbuf, n_pix);
|
||||
}
|
||||
#if !TMDS_FULLRES_NO_INTERP_SAVE
|
||||
interp_restore(interp0_hw, &interp0_save);
|
||||
interp_restore(interp1_hw, &interp1_save);
|
||||
#endif
|
||||
}
|
||||
|
||||
static const int8_t imbalance_lookup[16] = { -4, -2, -2, 0, -2, 0, 0, 2, -2, 0, 0, 2, 0, 2, 2, 4 };
|
||||
|
||||
static inline int byte_imbalance(uint32_t x)
|
||||
{
|
||||
return imbalance_lookup[x >> 4] + imbalance_lookup[x & 0xF];
|
||||
}
|
||||
|
||||
static void tmds_encode_symbols(uint8_t pixel, uint32_t* negative_balance_sym, uint32_t* positive_balance_sym)
|
||||
{
|
||||
int pixel_imbalance = byte_imbalance(pixel);
|
||||
uint32_t sym = pixel & 1;
|
||||
if (pixel_imbalance > 0 || (pixel_imbalance == 0 && sym == 0)) {
|
||||
for (int i = 0; i < 7; ++i) {
|
||||
sym |= (~((sym >> i) ^ (pixel >> (i + 1))) & 1) << (i + 1);
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < 7; ++i) {
|
||||
sym |= ( ((sym >> i) ^ (pixel >> (i + 1))) & 1) << (i + 1);
|
||||
}
|
||||
sym |= 0x100;
|
||||
}
|
||||
|
||||
int imbalance = byte_imbalance(sym & 0xFF);
|
||||
if (imbalance == 0) {
|
||||
if ((sym & 0x100) == 0) sym ^= 0x2ff;
|
||||
*positive_balance_sym = sym;
|
||||
*negative_balance_sym = sym;
|
||||
return;
|
||||
}
|
||||
else if (imbalance > 0) {
|
||||
*negative_balance_sym = (sym ^ 0x2ff) | (((-imbalance + imbalance_lookup[2 ^ (sym >> 8)] + 2) & 0x3F) << 26);
|
||||
*positive_balance_sym = sym | ((imbalance + imbalance_lookup[sym >> 8] + 2) << 26);
|
||||
}
|
||||
else {
|
||||
*negative_balance_sym = sym | (((imbalance + imbalance_lookup[sym >> 8] + 2) & 0x3F) << 26);
|
||||
*positive_balance_sym = (sym ^ 0x2ff) | ((-imbalance + imbalance_lookup[2 ^ (sym >> 8)] + 2) << 26);
|
||||
}
|
||||
}
|
||||
|
||||
// This takes a 16-bit (RGB 565) colour palette and makes palettes of TMDS symbols suitable
|
||||
// for performing fullres encode.
|
||||
// The TMDS palette buffer should be 6 * n_palette words long.
|
||||
// n_palette must be a power of 2 <= 256.
|
||||
void tmds_setup_palette_symbols(const uint16_t *palette, uint32_t *tmds_palette, size_t n_palette) {
|
||||
uint32_t* tmds_palette_blue = tmds_palette;
|
||||
uint32_t* tmds_palette_green = tmds_palette + 2 * n_palette;
|
||||
uint32_t* tmds_palette_red = tmds_palette + 4 * n_palette;
|
||||
for (int i = 0; i < n_palette; ++i) {
|
||||
uint16_t blue = (palette[i] << 3) & 0xf8;
|
||||
uint16_t green = (palette[i] >> 3) & 0xfc;
|
||||
uint16_t red = (palette[i] >> 8) & 0xf8;
|
||||
tmds_encode_symbols(blue, &tmds_palette_blue[i], &tmds_palette_blue[i + n_palette]);
|
||||
tmds_encode_symbols(green, &tmds_palette_green[i], &tmds_palette_green[i + n_palette]);
|
||||
tmds_encode_symbols(red, &tmds_palette_red[i], &tmds_palette_red[i + n_palette]);
|
||||
}
|
||||
}
|
||||
|
||||
// This takes a 24-bit (RGB 888) colour palette and makes palettes of TMDS symbols suitable
|
||||
// for performing fullres encode.
|
||||
// The TMDS palette buffer should be 6 * n_palette words long.
|
||||
// n_palette must be a power of 2 <= 256.
|
||||
void tmds_setup_palette24_symbols(const uint32_t *palette, uint32_t *tmds_palette, size_t n_palette) {
|
||||
uint32_t* tmds_palette_blue = tmds_palette;
|
||||
uint32_t* tmds_palette_green = tmds_palette + 2 * n_palette;
|
||||
uint32_t* tmds_palette_red = tmds_palette + 4 * n_palette;
|
||||
for (int i = 0; i < n_palette; ++i) {
|
||||
uint16_t blue = palette[i] & 0xff;
|
||||
uint16_t green = (palette[i] >> 8) & 0xff;
|
||||
uint16_t red = (palette[i] >> 16) & 0xff;
|
||||
tmds_encode_symbols(blue, &tmds_palette_blue[i], &tmds_palette_blue[i + n_palette]);
|
||||
tmds_encode_symbols(green, &tmds_palette_green[i], &tmds_palette_green[i + n_palette]);
|
||||
tmds_encode_symbols(red, &tmds_palette_red[i], &tmds_palette_red[i + n_palette]);
|
||||
}
|
||||
}
|
||||
|
||||
// Encode palette data for all 3 channels.
|
||||
// pixbuf is an array of n_pix 8-bit wide pixels containing palette values (32-bit word aligned)
|
||||
// tmds_palette is a palette of TMDS symbols produced by tmds_setup_palette_symbols
|
||||
// symbuf is 3*n_pix 32-bit words, this function writes the symbol values for each of the channels to it.
|
||||
void __not_in_flash_func(tmds_encode_palette_data)(const uint32_t *pixbuf, const uint32_t *tmds_palette, uint32_t *symbuf, size_t n_pix, uint32_t palette_bits) {
|
||||
uint core = get_core_num();
|
||||
#if !TMDS_FULLRES_NO_INTERP_SAVE
|
||||
interp_hw_save_t interp0_save, interp1_save;
|
||||
interp_save(interp0_hw, &interp0_save);
|
||||
interp_save(interp1_hw, &interp1_save);
|
||||
#endif
|
||||
|
||||
interp0_hw->base[2] = (uint32_t)tmds_palette;
|
||||
interp1_hw->base[2] = (uint32_t)tmds_palette;
|
||||
|
||||
// Lane 0 on both interpolators masks the palette bits, starting at bit 2,
|
||||
// The second interpolator also shifts to read the 2nd or 4th byte of the word.
|
||||
interp0_hw->ctrl[0] =
|
||||
(2 << SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB) |
|
||||
((palette_bits + 1) << SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB);
|
||||
interp1_hw->ctrl[0] =
|
||||
(8 << SIO_INTERP0_CTRL_LANE0_SHIFT_LSB) |
|
||||
(2 << SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB) |
|
||||
((palette_bits + 1) << SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB);
|
||||
|
||||
// Lane 1 shifts and masks the sign bit into the right position to add to the symbol
|
||||
// table index to choose the negative disparity symbols if the sign is negative.
|
||||
const uint32_t ctrl_lane_1 =
|
||||
((31 - (palette_bits + 2)) << SIO_INTERP0_CTRL_LANE0_SHIFT_LSB) |
|
||||
(palette_bits + 2) * ((1 << SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB) | (1 << SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB));
|
||||
interp0_hw->ctrl[1] = ctrl_lane_1;
|
||||
interp1_hw->ctrl[1] = ctrl_lane_1;
|
||||
|
||||
if (core) {
|
||||
tmds_palette_encode_loop_x(pixbuf, symbuf, n_pix);
|
||||
|
||||
interp0_hw->base[2] = (uint32_t)(tmds_palette + (2 << palette_bits));
|
||||
interp1_hw->base[2] = (uint32_t)(tmds_palette + (2 << palette_bits));
|
||||
tmds_palette_encode_loop_x(pixbuf, symbuf + (n_pix >> 1), n_pix);
|
||||
|
||||
interp0_hw->base[2] = (uint32_t)(tmds_palette + (4 << palette_bits));
|
||||
interp1_hw->base[2] = (uint32_t)(tmds_palette + (4 << palette_bits));
|
||||
tmds_palette_encode_loop_x(pixbuf, symbuf + n_pix, n_pix);
|
||||
} else {
|
||||
tmds_palette_encode_loop_y(pixbuf, symbuf, n_pix);
|
||||
|
||||
interp0_hw->base[2] = (uint32_t)(tmds_palette + (2 << palette_bits));
|
||||
interp1_hw->base[2] = (uint32_t)(tmds_palette + (2 << palette_bits));
|
||||
tmds_palette_encode_loop_y(pixbuf, symbuf + (n_pix >> 1), n_pix);
|
||||
|
||||
interp0_hw->base[2] = (uint32_t)(tmds_palette + (4 << palette_bits));
|
||||
interp1_hw->base[2] = (uint32_t)(tmds_palette + (4 << palette_bits));
|
||||
tmds_palette_encode_loop_y(pixbuf, symbuf + n_pix, n_pix);
|
||||
}
|
||||
|
||||
#if !TMDS_FULLRES_NO_INTERP_SAVE
|
||||
interp_restore(interp0_hw, &interp0_save);
|
||||
interp_restore(interp1_hw, &interp1_save);
|
||||
#endif
|
||||
}
|
||||
46
src/libdvi/tmds_encode.h
Normal file
46
src/libdvi/tmds_encode.h
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
#ifndef _TMDS_ENCODE_H_
|
||||
#define _TMDS_ENCODE_H_
|
||||
|
||||
#include "hardware/interp.h"
|
||||
#include "dvi_config_defs.h"
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
// Functions from tmds_encode.c
|
||||
void tmds_encode_data_channel_16bpp(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix, uint channel_msb, uint channel_lsb);
|
||||
void tmds_encode_data_channel_8bpp(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix, uint channel_msb, uint channel_lsb);
|
||||
void tmds_encode_data_channel_fullres_16bpp(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix, uint channel_msb, uint channel_lsb);
|
||||
void tmds_setup_palette_symbols(const uint16_t *palette, uint32_t *symbuf, size_t n_palette);
|
||||
void tmds_setup_palette24_symbols(const uint32_t *palette, uint32_t *symbuf, size_t n_palette);
|
||||
void tmds_encode_palette_data(const uint32_t *pixbuf, const uint32_t *tmds_palette, uint32_t *symbuf, size_t n_pix, uint32_t palette_bits);
|
||||
|
||||
// Functions from tmds_encode.S
|
||||
|
||||
void tmds_encode_1bpp(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix);
|
||||
void tmds_encode_2bpp(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix);
|
||||
|
||||
// Uses interp0:
|
||||
void tmds_encode_loop_16bpp(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix);
|
||||
void tmds_encode_loop_16bpp_leftshift(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix, uint leftshift);
|
||||
|
||||
// Uses interp0 and interp1:
|
||||
void tmds_encode_loop_8bpp(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix);
|
||||
void tmds_encode_loop_8bpp_leftshift(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix, uint leftshift);
|
||||
|
||||
// Uses interp0 and interp1:
|
||||
// (Note a copy is provided in scratch memories X and Y)
|
||||
void tmds_fullres_encode_loop_16bpp_x(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix);
|
||||
void tmds_fullres_encode_loop_16bpp_y(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix);
|
||||
void tmds_fullres_encode_loop_16bpp_leftshift_x(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix, uint leftshift);
|
||||
void tmds_fullres_encode_loop_16bpp_leftshift_y(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix, uint leftshift);
|
||||
void tmds_palette_encode_loop_x(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix);
|
||||
void tmds_palette_encode_loop_y(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
46
src/libdvi/tmds_encode_1bpp.pio
Normal file
46
src/libdvi/tmds_encode_1bpp.pio
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
.program tmds_encode_1bpp
|
||||
|
||||
; 1bpp black/white pixels go in, TMDS symbols come out.
|
||||
; Each output word contains two output symbols, each 10 bits in size,
|
||||
; right-justified. The least-significant symbol is displayed first.
|
||||
;
|
||||
; We can encode using the following LUT: (yes this is compliant)
|
||||
;
|
||||
; x % 2 | colour | symbol
|
||||
; ------+--------+-------
|
||||
; 0 | 0 | 0x100
|
||||
; 0 | 1 | 0x200
|
||||
; 1 | 0 | 0x1ff
|
||||
; 1 | 1 | 0x2ff
|
||||
;
|
||||
; OSR: shift to right, autopull, threshold 32
|
||||
; ISR: shift to right, autopush, threshold 24
|
||||
;
|
||||
; Note the ISR needs to be shifted to *right* so that we can get the first
|
||||
; pixel in the less-significant position. Threshold 24 so we can get 8x 0-bits
|
||||
; at the LSBs for free :)
|
||||
|
||||
even_pixel:
|
||||
out x, 1
|
||||
mov y, ~x
|
||||
in y, 1
|
||||
in x, 1
|
||||
|
||||
odd_pixel:
|
||||
mov x, ~null
|
||||
in x, 8
|
||||
out x, 1
|
||||
mov y, ~x
|
||||
in y, 1
|
||||
in x, 13 ; Bring total shift to 24, triggering push.
|
||||
|
||||
% c-sdk {
|
||||
static inline void tmds_encode_1bpp_init(PIO pio, uint sm) {
|
||||
uint offset = pio_add_program(pio, &tmds_encode_1bpp_program);
|
||||
pio_sm_config c = tmds_encode_1bpp_program_get_default_config(offset);
|
||||
sm_config_set_out_shift(&c, true, true, 32);
|
||||
sm_config_set_in_shift(&c, true, true, 24);
|
||||
pio_sm_init(pio, sm, offset, &c);
|
||||
pio_sm_set_enabled(pio, sm, true);
|
||||
}
|
||||
%}
|
||||
76
src/libdvi/tmds_table.h
Normal file
76
src/libdvi/tmds_table.h
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
// Generated from tmds_table_gen.py
|
||||
//
|
||||
// This table converts a 6 bit data input into a pair of TMDS data symbols
|
||||
// with data content *almost* equal (1 LSB off) to input value left shifted by
|
||||
// two. The pairs of symbols have a net DC balance of 0.
|
||||
//
|
||||
// The two symbols are concatenated in the 20 LSBs of a data word, with the
|
||||
// first symbol in least-significant position.
|
||||
//
|
||||
// Note the declaration isn't included here, just the table body. This is in
|
||||
// case you want multiple copies of the table in different SRAMs (particularly
|
||||
// scratch X/Y).
|
||||
0x7fd00u,
|
||||
0x40dfcu,
|
||||
0x41df8u,
|
||||
0x7ed04u,
|
||||
0x43df0u,
|
||||
0x7cd0cu,
|
||||
0x7dd08u,
|
||||
0x42df4u,
|
||||
0x47de0u,
|
||||
0x78d1cu,
|
||||
0x79d18u,
|
||||
0x46de4u,
|
||||
0x7bd10u,
|
||||
0x44decu,
|
||||
0x45de8u,
|
||||
0xafa41u,
|
||||
0x4fdc0u,
|
||||
0x70d3cu,
|
||||
0x71d38u,
|
||||
0x4edc4u,
|
||||
0x73d30u,
|
||||
0x4cdccu,
|
||||
0x4ddc8u,
|
||||
0xa7a61u,
|
||||
0x77d20u,
|
||||
0x48ddcu,
|
||||
0x49dd8u,
|
||||
0xa3a71u,
|
||||
0x4bdd0u,
|
||||
0xa1a79u,
|
||||
0xa0a7du,
|
||||
0x9fa81u,
|
||||
0x5fd80u,
|
||||
0x60d7cu,
|
||||
0x61d78u,
|
||||
0x5ed84u,
|
||||
0x63d70u,
|
||||
0x5cd8cu,
|
||||
0x5dd88u,
|
||||
0xb7a21u,
|
||||
0x67d60u,
|
||||
0x58d9cu,
|
||||
0x59d98u,
|
||||
0xb3a31u,
|
||||
0x5bd90u,
|
||||
0xb1a39u,
|
||||
0xb0a3du,
|
||||
0x8fac1u,
|
||||
0x6fd40u,
|
||||
0x50dbcu,
|
||||
0x51db8u,
|
||||
0xbba11u,
|
||||
0x53db0u,
|
||||
0xb9a19u,
|
||||
0xb8a1du,
|
||||
0x87ae1u,
|
||||
0x57da0u,
|
||||
0xbda09u,
|
||||
0xbca0du,
|
||||
0x83af1u,
|
||||
0xbea05u,
|
||||
0x81af9u,
|
||||
0x80afdu,
|
||||
0xbfa01u,
|
||||
139
src/libdvi/tmds_table_fullres.h
Normal file
139
src/libdvi/tmds_table_fullres.h
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
// Each entry consists of a 10 bit TMDS symbol in pseudo-differential format
|
||||
// (10 LSBs) and the symbol's disparity as a 6 bit signed integer (the 6
|
||||
// MSBs). There is a 16 bit gap in between them, which is actually vital for
|
||||
// the way the TMDS encode works!
|
||||
//
|
||||
// There are 128 1-word entries. The lookup index should be the concatenation
|
||||
// of the sign bit of current running disparity, with 6 bits of colour channel
|
||||
// data.
|
||||
|
||||
// Non-negative running disparity:
|
||||
0xe0000100,
|
||||
0xf8000303,
|
||||
0x00000307,
|
||||
0xe8000104,
|
||||
0x000001f0,
|
||||
0xf000010c,
|
||||
0xe8000108,
|
||||
0x0000030b,
|
||||
0xf80001e0,
|
||||
0xf800011c,
|
||||
0xf0000118,
|
||||
0x000001e4,
|
||||
0xe8000110,
|
||||
0x00000313,
|
||||
0x000001e8,
|
||||
0xf0000241,
|
||||
0xf00001c0,
|
||||
0x0000013c,
|
||||
0xf8000138,
|
||||
0xf80001c4,
|
||||
0xf0000130,
|
||||
0x000001cc,
|
||||
0xf80001c8,
|
||||
0xf8000261,
|
||||
0xe8000120,
|
||||
0x00000323,
|
||||
0x000001d8,
|
||||
0x00000271,
|
||||
0xf80001d0,
|
||||
0xf0000086,
|
||||
0xe8000082,
|
||||
0xf0000281,
|
||||
0xe8000180,
|
||||
0x00000383,
|
||||
0x00000178,
|
||||
0xf0000184,
|
||||
0xf8000170,
|
||||
0xf800018c,
|
||||
0xf0000188,
|
||||
0xf0000221,
|
||||
0xf0000160,
|
||||
0x0000019c,
|
||||
0xf8000198,
|
||||
0xf8000231,
|
||||
0xf0000190,
|
||||
0x00000239,
|
||||
0xf00000c2,
|
||||
0xf80002c1,
|
||||
0xe8000140,
|
||||
0x00000343,
|
||||
0x000001b8,
|
||||
0xf0000211,
|
||||
0xf80001b0,
|
||||
0xf8000219,
|
||||
0x0000021d,
|
||||
0x000002e1,
|
||||
0xf00001a0,
|
||||
0xf0000209,
|
||||
0xf800020d,
|
||||
0xf000000e,
|
||||
0xf0000205,
|
||||
0xe8000006,
|
||||
0xe0000002,
|
||||
0xe8000201,
|
||||
// Negative running disparity:
|
||||
0x280003ff,
|
||||
0x100001fc,
|
||||
0x080001f8,
|
||||
0x200003fb,
|
||||
0x000001f0,
|
||||
0x180003f3,
|
||||
0x200003f7,
|
||||
0x080001f4,
|
||||
0x1000031f,
|
||||
0x100003e3,
|
||||
0x180003e7,
|
||||
0x000001e4,
|
||||
0x200003ef,
|
||||
0x080001ec,
|
||||
0x000001e8,
|
||||
0x080000be,
|
||||
0x1800033f,
|
||||
0x0000013c,
|
||||
0x100003c7,
|
||||
0x1000033b,
|
||||
0x180003cf,
|
||||
0x000001cc,
|
||||
0x10000337,
|
||||
0x0000009e,
|
||||
0x200003df,
|
||||
0x080001dc,
|
||||
0x000001d8,
|
||||
0x00000271,
|
||||
0x1000032f,
|
||||
0x08000279,
|
||||
0x1000027d,
|
||||
0x0800007e,
|
||||
0x2000037f,
|
||||
0x0800017c,
|
||||
0x00000178,
|
||||
0x1800037b,
|
||||
0x1000038f,
|
||||
0x10000373,
|
||||
0x18000377,
|
||||
0x080000de,
|
||||
0x1800039f,
|
||||
0x0000019c,
|
||||
0x10000367,
|
||||
0x000000ce,
|
||||
0x1800036f,
|
||||
0x00000239,
|
||||
0x0800023d,
|
||||
0x0000003e,
|
||||
0x200003bf,
|
||||
0x080001bc,
|
||||
0x000001b8,
|
||||
0x080000ee,
|
||||
0x1000034f,
|
||||
0x000000e6,
|
||||
0x0000021d,
|
||||
0x000002e1,
|
||||
0x1800035f,
|
||||
0x080000f6,
|
||||
0x000000f2,
|
||||
0x080002f1,
|
||||
0x080000fa,
|
||||
0x100002f9,
|
||||
0x180002fd,
|
||||
0x100000fe,
|
||||
150
src/libdvi/tmds_table_gen.py
Executable file
150
src/libdvi/tmds_table_gen.py
Executable file
|
|
@ -0,0 +1,150 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# The key fact is that, if x is even, and the encoder currently has a running
|
||||
# imbalance of 0, encoding x followed by x + 1 produces a symbol pair with a
|
||||
# net balance of 0.
|
||||
#
|
||||
# This is a reasonable constraint, because we only want RGB565 (so 6 valid
|
||||
# channel data bits -> data is multiple of 4), and can probably tolerate
|
||||
# 0.25LSB of noise :)
|
||||
#
|
||||
# This means that encoding a half-horizontal-resolution scanline buffer is a
|
||||
# simple LUT operation for each colour channel, because we have made the
|
||||
# encoding process stateless by guaranteeing 0 balance.
|
||||
|
||||
def popcount(x):
|
||||
n = 0
|
||||
while x:
|
||||
n += 1
|
||||
x = x & (x - 1)
|
||||
return n
|
||||
|
||||
# Equivalent to N1(q) - N0(q) in the DVI spec
|
||||
def byteimbalance(x):
|
||||
return 2 * popcount(x) - 8
|
||||
|
||||
# This is a direct translation of "Figure 3-5. T.M.D.S. Encode Algorithm" on
|
||||
# page 29 of DVI 1.0 spec
|
||||
|
||||
class TMDSEncode:
|
||||
ctrl_syms = {
|
||||
0b00: 0b1101010100,
|
||||
0b01: 0b0010101011,
|
||||
0b10: 0b0101010100,
|
||||
0b11: 0b1010101011
|
||||
}
|
||||
def __init__(self):
|
||||
self.imbalance = 0
|
||||
|
||||
def encode(self, d, c, de):
|
||||
if not de:
|
||||
self.imbalance = 0
|
||||
return self.ctrl_syms[c]
|
||||
# Minimise transitions
|
||||
q_m = d & 0x1
|
||||
if popcount(d) > 4 or (popcount(d) == 4 and not d & 0x1):
|
||||
for i in range(7):
|
||||
q_m = q_m | (~(q_m >> i ^ d >> i + 1) & 0x1) << i + 1
|
||||
else:
|
||||
for i in range(7):
|
||||
q_m = q_m | ( (q_m >> i ^ d >> i + 1) & 0x1) << i + 1
|
||||
q_m = q_m | 0x100
|
||||
# Correct DC balance
|
||||
inversion_mask = 0x2ff
|
||||
q_out = 0
|
||||
if self.imbalance == 0 or byteimbalance(q_m & 0xff) == 0:
|
||||
q_out = q_m ^ (0 if q_m & 0x100 else inversion_mask)
|
||||
if q_m & 0x100:
|
||||
self.imbalance += byteimbalance(q_m & 0xff)
|
||||
else:
|
||||
self.imbalance -= byteimbalance(q_m & 0xff)
|
||||
elif (self.imbalance > 0) == (byteimbalance(q_m & 0xff) > 0):
|
||||
q_out = q_m ^ inversion_mask
|
||||
self.imbalance += ((q_m & 0x100) >> 7) - byteimbalance(q_m & 0xff)
|
||||
else:
|
||||
q_out = q_m
|
||||
self.imbalance += byteimbalance(q_m & 0xff) - ((~q_m & 0x100) >> 7)
|
||||
return q_out
|
||||
|
||||
# Turn a bitmap of width n into n pairs of pseudo-differential bits
|
||||
def differentialise(x, n):
|
||||
accum = 0
|
||||
for i in range(n):
|
||||
accum <<= 2
|
||||
if x & (1 << (n - 1)):
|
||||
accum |= 0b01
|
||||
else:
|
||||
accum |= 0b10
|
||||
x <<= 1
|
||||
return accum
|
||||
|
||||
enc = TMDSEncode()
|
||||
|
||||
|
||||
###
|
||||
# Pixel-doubled table:
|
||||
|
||||
# for i in range(0, 256, 4):
|
||||
# sym0 = enc.encode(i, 0, 1)
|
||||
# sym1 = enc.encode(i ^ 1, 0, 1)
|
||||
# assert(enc.imbalance == 0)
|
||||
# print(f"0x{sym0 | (sym1 << 10):05x}u,")
|
||||
|
||||
###
|
||||
# Fullres 1bpp table: (each entry is 2 words, 4 pixels)
|
||||
|
||||
# (note trick here is that encoding 0x00 or 0xff sets imbalance to -8, and
|
||||
# (encoding 0x01 or 0xfe returns imbalance to 0, so we alternate between these
|
||||
# (two pairs of dark/light colours. Creates some fairly subtle vertical
|
||||
# (banding, but it's cheap.
|
||||
|
||||
# for i in range(1 << 4):
|
||||
# syms = list(enc.encode((0xff if i & 1 << j else 0) ^ j & 0x01, 0, 1) for j in range(4))
|
||||
# print(f"0x{syms[0] | syms[1] << 10:05x}, 0x{syms[2] | syms[3] << 10:05x}")
|
||||
# assert(enc.imbalance == 0)
|
||||
|
||||
###
|
||||
# Fullres table stuff:
|
||||
|
||||
# def disptable_format(sym):
|
||||
# return sym | ((popcount(sym) * 2 - 10 & 0x3f) << 26)
|
||||
|
||||
# print("// Non-negative running disparity:")
|
||||
# for i in range(0, 256, 4):
|
||||
# enc.imbalance = 1
|
||||
# print("0x{:08x},".format(disptable_format(enc.encode(i, 0, 1))))
|
||||
|
||||
# print("// Negative running disparity:")
|
||||
# for i in range(0, 256, 4):
|
||||
# enc.imbalance = -1
|
||||
# print("0x{:08x},".format(disptable_format(enc.encode(i, 0, 1))))
|
||||
|
||||
###
|
||||
# Control symbols:
|
||||
|
||||
# for i in range(4):
|
||||
# sym = enc.encode(0, i, 0)
|
||||
# print(f"0x{sym << 10 | sym:05x},")
|
||||
|
||||
|
||||
###
|
||||
# Find zero-balance symbols:
|
||||
|
||||
# for i in range(256):
|
||||
# enc.imbalance = 0
|
||||
# sym = enc.encode(i, 0, 1)
|
||||
# if enc.imbalance == 0:
|
||||
# print(f"{i:02x}: {sym:03x}")
|
||||
|
||||
###
|
||||
# Generate 2bpp table based on above experiment:
|
||||
|
||||
levels_2bpp_even = [0x05, 0x50, 0xaf, 0xfa]
|
||||
levels_2bpp_odd = [0x04, 0x51, 0xae, 0xfb]
|
||||
|
||||
for i1, p1 in enumerate(levels_2bpp_odd):
|
||||
for i0, p0 in enumerate(levels_2bpp_even):
|
||||
sym0 = enc.encode(p0, 0, 1)
|
||||
sym1 = enc.encode(p1, 0, 1)
|
||||
assert(enc.imbalance == 0)
|
||||
print(f".word 0x{sym1 << 10 | sym0:05x} // {i0:02b}, {i1:02b}")
|
||||
83
src/libdvi/util_queue_u32_inline.h
Normal file
83
src/libdvi/util_queue_u32_inline.h
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
#ifndef _UTIL_QUEUE_U32_INLINE_H
|
||||
#define _UTIL_QUEUE_U32_INLINE_H
|
||||
|
||||
// Faster versions of the functions found in pico/util/queue.h, for the common
|
||||
// case of 32-bit-sized elements. Can be used on the same queue data
|
||||
// structure, and mixed freely with the generic access methods, as long as
|
||||
// element_size == 4.
|
||||
|
||||
#include "pico/util/queue.h"
|
||||
#include "hardware/sync.h"
|
||||
|
||||
static inline uint16_t _queue_inc_index_u32(queue_t *q, uint16_t index) {
|
||||
if (++index > q->element_count) { // > because we have element_count + 1 elements
|
||||
index = 0;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
static inline bool queue_try_add_u32(queue_t *q, void *data) {
|
||||
bool success = false;
|
||||
uint32_t flags = spin_lock_blocking(q->core.spin_lock);
|
||||
if (queue_get_level_unsafe(q) != q->element_count) {
|
||||
((uint32_t*)q->data)[q->wptr] = *(uint32_t*)data;
|
||||
q->wptr = _queue_inc_index_u32(q, q->wptr);
|
||||
success = true;
|
||||
}
|
||||
spin_unlock(q->core.spin_lock, flags);
|
||||
if (success) __sev();
|
||||
return success;
|
||||
}
|
||||
|
||||
static inline bool queue_try_remove_u32(queue_t *q, void *data) {
|
||||
bool success = false;
|
||||
uint32_t flags = spin_lock_blocking(q->core.spin_lock);
|
||||
if (queue_get_level_unsafe(q) != 0) {
|
||||
*(uint32_t*)data = ((uint32_t*)q->data)[q->rptr];
|
||||
q->rptr = _queue_inc_index_u32(q, q->rptr);
|
||||
success = true;
|
||||
}
|
||||
spin_unlock(q->core.spin_lock, flags);
|
||||
if (success) __sev();
|
||||
return success;
|
||||
}
|
||||
|
||||
static inline bool queue_try_peek_u32(queue_t *q, void *data) {
|
||||
bool success = false;
|
||||
uint32_t flags = spin_lock_blocking(q->core.spin_lock);
|
||||
if (queue_get_level_unsafe(q) != 0) {
|
||||
*(uint32_t*)data = ((uint32_t*)q->data)[q->rptr];
|
||||
success = true;
|
||||
}
|
||||
spin_unlock(q->core.spin_lock, flags);
|
||||
return success;
|
||||
}
|
||||
|
||||
static inline void queue_add_blocking_u32(queue_t *q, void *data) {
|
||||
bool done;
|
||||
do {
|
||||
done = queue_try_add_u32(q, data);
|
||||
if (done) break;
|
||||
__wfe();
|
||||
} while (true);
|
||||
}
|
||||
|
||||
static inline void queue_remove_blocking_u32(queue_t *q, void *data) {
|
||||
bool done;
|
||||
do {
|
||||
done = queue_try_remove_u32(q, data);
|
||||
if (done) break;
|
||||
__wfe();
|
||||
} while (true);
|
||||
}
|
||||
|
||||
static inline void queue_peek_blocking_u32(queue_t *q, void *data) {
|
||||
bool done;
|
||||
do {
|
||||
done = queue_try_peek_u32(q, data);
|
||||
if (done) break;
|
||||
__wfe();
|
||||
} while (true);
|
||||
}
|
||||
|
||||
#endif
|
||||
Loading…
Reference in a new issue