diff --git a/CMakeLists.txt b/CMakeLists.txt index 7e3dae7..48a0266 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,8 +5,8 @@ if (NOT TARGET _pico_extras_inclusion_marker) # Pull in PICO SDK (must be before project) include(pico_sdk_import.cmake) - if (PICO_SDK_VERSION_STRING VERSION_LESS "1.2.0") - message(FATAL_ERROR "Require at least Raspberry Pi Pico SDK version 1.2.0") + if (PICO_SDK_VERSION_STRING VERSION_LESS "2.0.0") + message(FATAL_ERROR "Require at least Raspberry Pi Pico SDK version 2.0.0") endif() project(pico_extras C CXX) diff --git a/src/common/pico_audio/CMakeLists.txt b/src/common/pico_audio/CMakeLists.txt index 914d56c..a7cc15b 100644 --- a/src/common/pico_audio/CMakeLists.txt +++ b/src/common/pico_audio/CMakeLists.txt @@ -9,8 +9,12 @@ if (NOT TARGET pico_audio) target_sources(pico_audio INTERFACE ${CMAKE_CURRENT_LIST_DIR}/audio.cpp - $<$>:${CMAKE_CURRENT_LIST_DIR}/audio_utils.S> ) + if (NOT PICO_NO_HARDWARE AND NOT PICO_RISCV) + target_sources(pico_audio INTERFACE + ${CMAKE_CURRENT_LIST_DIR}/audio_utils.S + ) + endif() target_link_libraries(pico_audio INTERFACE pico_audio_headers pico_sync) endif() diff --git a/src/common/pico_scanvideo/include/pico/scanvideo/scanvideo_base.h b/src/common/pico_scanvideo/include/pico/scanvideo/scanvideo_base.h index 2002db6..645d0fd 100644 --- a/src/common/pico_scanvideo/include/pico/scanvideo/scanvideo_base.h +++ b/src/common/pico_scanvideo/include/pico/scanvideo/scanvideo_base.h @@ -287,6 +287,7 @@ extern const scanvideo_mode_t vga_mode_1024x768_60; extern const scanvideo_mode_t vga_mode_1280x1024_60; extern const scanvideo_mode_t vga_mode_720p_60; extern const scanvideo_mode_t vga_mode_1080p_60; +extern const scanvideo_mode_t vga_mode_1440p_60; extern const scanvideo_mode_t vga_mode_tft_800x480_50; extern const scanvideo_mode_t vga_mode_tft_400x240_50; diff --git a/src/common/pico_scanvideo/vga_modes.c b/src/common/pico_scanvideo/vga_modes.c index 62f9a15..8c4a333 100644 --- a/src/common/pico_scanvideo/vga_modes.c +++ b/src/common/pico_scanvideo/vga_modes.c @@ -610,3 +610,36 @@ const scanvideo_mode_t vga_mode_1280x1024_60 = .xscale = 1, .yscale = 1, }; + +const scanvideo_timing_t vga_timing_1920x1440_60_default = + { + .clock_freq = 234000000, + + .h_active = 1920, + .v_active = 1440, + + .h_front_porch = 128, + .h_pulse = 208, + .h_total = 2600, + .h_sync_polarity = 1, + + .v_front_porch = 1, + .v_pulse = 3, + .v_total = 1500, + .v_sync_polarity = 0, + + .enable_clock = 0, + .clock_polarity = 0, + + .enable_den = 0 + }; + +const scanvideo_mode_t vga_mode_1440p_60 = + { + .default_timing = &vga_timing_1920x1440_60_default, + .pio_program = &video_24mhz_composable, + .width = 1920, + .height = 1440, + .xscale = 1, + .yscale = 1, + }; \ No newline at end of file diff --git a/src/common/platypus/CMakeLists.txt b/src/common/platypus/CMakeLists.txt index 2a04d94..15ac4b1 100644 --- a/src/common/platypus/CMakeLists.txt +++ b/src/common/platypus/CMakeLists.txt @@ -2,7 +2,8 @@ add_library(platypus INTERFACE) target_sources(platypus INTERFACE ${CMAKE_CURRENT_LIST_DIR}/platypus.c - $<$:${CMAKE_CURRENT_LIST_DIR}/decompress_row.S> + $<$:${CMAKE_CURRENT_LIST_DIR}/decompress_row.S> + $<$:${CMAKE_CURRENT_LIST_DIR}/decompress_row_33.S> ) target_include_directories(platypus INTERFACE ${CMAKE_CURRENT_LIST_DIR}) diff --git a/src/common/platypus/decompress_row_33.S b/src/common/platypus/decompress_row_33.S new file mode 100644 index 0000000..07ce1a0 --- /dev/null +++ b/src/common/platypus/decompress_row_33.S @@ -0,0 +1,468 @@ +#include "hardware/regs/addressmap.h" +.syntax unified +.cpu cortex-m33 +.thumb + +#ifndef VIDEO_DBI +#error only VIDEO_DBI supported +#endif +#define FIRST_TEST_SHIFT 6 +#define SECOND_TEST_SHIFT 11 + +// For PLATYPUS_GATED_EOL_CHECK, we remove most checks for running off the end of the output buffer when +// the next input is not word aligned. +// This sounds bad, however, we actually inject a new check in the slow/least-compressed 7 byte path +// before reading the data. +// The upshot is that we can avoid the checks as mentioned above, as long as we append 0xa0 bytes +// up to a word boundary beyond the data, to force a read off the end of the compressed data into +// the 7-byte path (note 0xa0 covers both DBI and non-DBI mode for historical reasons) +#if PLATYPUS_GATED_EOL_CHECK +#define GATE_ALIGN_1_CHECK (FIRST_TEST_SHIFT<8) +#define GATE_ALIGN_2_CHECK (FIRST_TEST_SHIFT<16) +#define GATE_ALIGN_3_CHECK (FIRST_TEST_SHIFT<24) +#endif + +#define r_output r0 +#define r_data r1 +#define r_input r2 +#define r_top r3 +#define r_bottom r4 +#define r_tmp3 r5 +#define r_tmp2 r6 +#define r_tmp1 r7 +#define r_output_end r8 +#define r_5_tables r9 +#define r_222_table r10 +#define r_mask r11 +// for now +#define r_tmp_hi1 r_tmp3 +#define r_row_delta r14 + +.macro align_fall_thru +.p2align 2 +.endm + +.macro debuggo val +#if 0 +push {r0-r3, ip, lr} +mov r0, r_input +mov r1, \val +mov r2, r_top +mov lr, ip +bl debuggo_print +pop {r0-r3, ip, lr} +#endif +.endm + +.macro lo_to_hi_lo dest, src +// src -- -- AB CD +// dest AB CD AB CD + pkhbt \dest, \src, \src, lsl #16 +.endm + +.macro do_222 + ubfx r_tmp_hi1, r_tmp1, #12, #6 + add r_tmp_hi1, r_222_table, r_tmp_hi1, lsl #3 + ldrd r_tmp1, r_tmp2, [r_tmp_hi1] + add r_bottom, r_top, r_tmp2 + add r_top, r_tmp1 +.endm + +.macro do_555 + ubfx r_tmp_hi1, r_tmp1, #11, #5 // red idx + add r_tmp2, r_5_tables, r_tmp_hi1, LSL #3 + ldrd r_tmp2, r_tmp_hi1, [r_tmp2, #0] // red values + add r_bottom, r_top, r_tmp_hi1 + add r_top, r_tmp2 + + ubfx r_tmp_hi1, r_tmp1, #16, #5 // green idx + add r_tmp2, r_5_tables, r_tmp_hi1, LSL #3 + ldrd r_tmp2, r_tmp_hi1, [r_tmp2, #256] // green values + add r_top, r_tmp2 + add r_bottom, r_tmp_hi1 + + ubfx r_tmp_hi1, r_tmp1, #21, #5 // blue idx + add r_tmp2, r_5_tables, r_tmp_hi1, LSL #3 + ldrd r_tmp2, r_tmp_hi1, [r_tmp2, #512] // blue values + add r_top, r_tmp2 + add r_bottom, r_tmp_hi1 +.endm + +// inputs, are top and data +.macro shuffle_7_bytes_to_8 top_from, bottom_from tmp_a tmp_b +//#define SHRINKO 1 // todo still need to figure out why this makes things worse - ok actually don't think it makes things much worse, the problem seems LSL/LSR shifts here take it back to 2 cycles anyway + +#if 0 + ldr \tmp_b, =#0xff210821 + lsls \tmp_a, \tmp_b, #16 + ands \tmp_b, \bottom_from + ands \tmp_a, \top_from +#else +#if SHRINKO + ands \tmp_b, r_mask, \bottom_from + ands \tmp_a, \top_from, r_mask, LSL #16 +#else + lsls \tmp_a, r_mask, #16 + ands \tmp_b, r_mask, \bottom_from + ands \tmp_a, \top_from +#endif +#endif + + eors r_bottom, \bottom_from, \tmp_b + eors r_top, \top_from, \tmp_a + + // todo can we shave 1 more cycles to make cycles in the DBI code? a challenge to anyone who reads this! +#if SHRINKO + .p2align 2 + orrs \tmp_b, \tmp_b, \tmp_a, LSR #13 +#else + lsrs \tmp_a, #13 + orrs \tmp_b, \tmp_a +#endif + +#if SHRINKO + eors \tmp_b, \tmp_b, \tmp_b, LSR #10 +#else + lsrs \tmp_a, \tmp_b, #10 + eors \tmp_b, \tmp_a +#endif + + lsrs \tmp_a, \tmp_b, #12 + adcs \tmp_b, \tmp_b + +#if SHRINKO + orrs r_bottom, r_bottom, \tmp_b, LSL #24 +#else + lsls \tmp_b, #24 + orrs r_bottom, \tmp_b +#endif +.endm + +.macro write_output + str r_bottom, [r_output, r_row_delta] + stmia r_output!, {r_top} +.endm + + +#if PLATYPUS_TABLES_MAIN_RAM +.section .data +.global shared_5_table +shared_5_table: +#else +.macro decompressor name data_section_prefix code_section_prefix +.section \data_section_prefix\().\name\().data +.global \name\()_5_table +\name\()_5_table: +#endif + +#define Q 0 +.word 0x00000000 << Q, 0x00000000 << Q, 0x00000000 << Q, 0x00000001 << Q, 0x00010000 << Q, 0x00010001 << Q, 0x00010000 << Q, 0x00000001 << Q +.word 0x00010001 << Q, 0x00000001 << Q, 0x00010000 << Q, 0x00000000 << Q, 0x00000000 << Q, 0x00010001 << Q, 0x00010000 << Q, 0x00010002 << Q +.word 0x00000000 << Q, 0x00010002 << Q, 0x00020000 << Q, 0x00010001 << Q, 0x00010000 << Q, 0x00020002 << Q, 0x00010000 << Q, 0x00010000 << Q +.word 0x00000001 << Q, 0x00000001 << Q, 0x00010001 << Q, 0x00000002 << Q, 0x00010001 << Q, 0x00000000 << Q, 0x00020001 << Q, 0x00000001 << Q +.word 0x00020000 << Q, 0x00020001 << Q, 0x00000001 << Q, 0x00000002 << Q, 0x00020002 << Q, 0x00000001 << Q, 0x00020001 << Q, 0x00000000 << Q +.word 0x00000000 << Q, 0x00000002 << Q, 0x00000001 << Q, 0x00010002 << Q, 0x00010000 << Q, 0x00020001 << Q, 0x00020000 << Q, 0x00010000 << Q +.word 0x00020000 << Q, 0x00020002 << Q, 0x00000000 << Q, 0x00020002 << Q, 0x00010002 << Q, 0x00000002 << Q, 0x00010000 << Q, 0x00020003 << Q +.word 0x00000000 << Q, 0x00020003 << Q, 0x00010000 << Q, 0x00030003 << Q, 0x00010000 << Q, 0x00000002 << Q, 0x00000001 << Q, 0x00010001 << Q + +#undef Q +#define Q 6 +.word 0x00000000 << Q, 0x00000000 << Q, 0x00000000 << Q, 0x00000001 << Q, 0x00010000 << Q, 0x00010001 << Q, 0x00010000 << Q, 0x00000001 << Q +.word 0x00010001 << Q, 0x00000001 << Q, 0x00010000 << Q, 0x00000000 << Q, 0x00000000 << Q, 0x00010001 << Q, 0x00010000 << Q, 0x00010002 << Q +.word 0x00000000 << Q, 0x00010002 << Q, 0x00020000 << Q, 0x00010001 << Q, 0x00010000 << Q, 0x00020002 << Q, 0x00010000 << Q, 0x00010000 << Q +.word 0x00000001 << Q, 0x00000001 << Q, 0x00010001 << Q, 0x00000002 << Q, 0x00010001 << Q, 0x00000000 << Q, 0x00020001 << Q, 0x00000001 << Q +.word 0x00020000 << Q, 0x00020001 << Q, 0x00000001 << Q, 0x00000002 << Q, 0x00020002 << Q, 0x00000001 << Q, 0x00020001 << Q, 0x00000000 << Q +.word 0x00000000 << Q, 0x00000002 << Q, 0x00000001 << Q, 0x00010002 << Q, 0x00010000 << Q, 0x00020001 << Q, 0x00020000 << Q, 0x00010000 << Q +.word 0x00020000 << Q, 0x00020002 << Q, 0x00000000 << Q, 0x00020002 << Q, 0x00010002 << Q, 0x00000002 << Q, 0x00010000 << Q, 0x00020003 << Q +.word 0x00000000 << Q, 0x00020003 << Q, 0x00010000 << Q, 0x00030003 << Q, 0x00010000 << Q, 0x00000002 << Q, 0x00000001 << Q, 0x00010001 << Q + +#undef Q +#define Q 11 +.word 0x00000000 << Q, 0x00000000 << Q, 0x00000000 << Q, 0x00000001 << Q, 0x00010000 << Q, 0x00010001 << Q, 0x00010000 << Q, 0x00000001 << Q +.word 0x00010001 << Q, 0x00000001 << Q, 0x00010000 << Q, 0x00000000 << Q, 0x00000000 << Q, 0x00010001 << Q, 0x00010000 << Q, 0x00010002 << Q +.word 0x00000000 << Q, 0x00010002 << Q, 0x00020000 << Q, 0x00010001 << Q, 0x00010000 << Q, 0x00020002 << Q, 0x00010000 << Q, 0x00010000 << Q +.word 0x00000001 << Q, 0x00000001 << Q, 0x00010001 << Q, 0x00000002 << Q, 0x00010001 << Q, 0x00000000 << Q, 0x00020001 << Q, 0x00000001 << Q +.word 0x00020000 << Q, 0x00020001 << Q, 0x00000001 << Q, 0x00000002 << Q, 0x00020002 << Q, 0x00000001 << Q, 0x00020001 << Q, 0x00000000 << Q +.word 0x00000000 << Q, 0x00000002 << Q, 0x00000001 << Q, 0x00010002 << Q, 0x00010000 << Q, 0x00020001 << Q, 0x00020000 << Q, 0x00010000 << Q +.word 0x00020000 << Q, 0x00020002 << Q, 0x00000000 << Q, 0x00020002 << Q, 0x00010002 << Q, 0x00000002 << Q, 0x00010000 << Q, 0x00020003 << Q +.word 0x00000000 << Q, 0x00020003 << Q, 0x00010000 << Q, 0x00030003 << Q, 0x00010000 << Q, 0x00000002 << Q, 0x00000001 << Q, 0x00010001 << Q + +#if PLATYPUS_TABLES_MAIN_RAM +.global shared_222_table +shared_222_table: +#else +.global \name\()_222_table +\name\()_222_table: +#endif +.word 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00010000, 0x00010001, 0x00010000, 0x00000001 +.word 0x00000000, 0x00000040, 0x00000000, 0x00000041, 0x00010000, 0x00010041, 0x00010000, 0x00000041 +.word 0x00400000, 0x00400040, 0x00400000, 0x00400041, 0x00410000, 0x00410041, 0x00410000, 0x00400041 +.word 0x00400000, 0x00000040, 0x00400000, 0x00000041, 0x00410000, 0x00010041, 0x00410000, 0x00000041 +.word 0x00000000, 0x00000800, 0x00000000, 0x00000801, 0x00010000, 0x00010801, 0x00010000, 0x00000801 +.word 0x00000000, 0x00000840, 0x00000000, 0x00000841, 0x00010000, 0x00010841, 0x00010000, 0x00000841 +.word 0x00400000, 0x00400840, 0x00400000, 0x00400841, 0x00410000, 0x00410841, 0x00410000, 0x00400841 +.word 0x00400000, 0x00000840, 0x00400000, 0x00000841, 0x00410000, 0x00010841, 0x00410000, 0x00000841 +.word 0x08000000, 0x08000800, 0x08000000, 0x08000801, 0x08010000, 0x08010801, 0x08010000, 0x08000801 +.word 0x08000000, 0x08000840, 0x08000000, 0x08000841, 0x08010000, 0x08010841, 0x08010000, 0x08000841 +.word 0x08400000, 0x08400840, 0x08400000, 0x08400841, 0x08410000, 0x08410841, 0x08410000, 0x08400841 +.word 0x08400000, 0x08000840, 0x08400000, 0x08000841, 0x08410000, 0x08010841, 0x08410000, 0x08000841 +.word 0x08000000, 0x00000800, 0x08000000, 0x00000801, 0x08010000, 0x00010801, 0x08010000, 0x00000801 +.word 0x08000000, 0x00000840, 0x08000000, 0x00000841, 0x08010000, 0x00010841, 0x08010000, 0x00000841 +.word 0x08400000, 0x00400840, 0x08400000, 0x00400841, 0x08410000, 0x00410841, 0x08410000, 0x00400841 +.word 0x08400000, 0x00000840, 0x08400000, 0x00000841, 0x08410000, 0x00010841, 0x08410000, 0x00000841 + +#if PLATYPUS_TABLES_MAIN_RAM +.macro decompressor name data_section_prefix code_section_prefix +#endif +.section \code_section_prefix\().\name\().code, "ax" + +.global \name +.type \name,%function +.thumb_func +//const uint8_t* \name(uint32_t *d0, uint32_t *d1, const uint8_t *s, uint32_t w); +\name: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +#if PLATYPUS_TABLES_MAIN_RAM + ldr r_5_tables, =shared_5_table + ldr r_222_table, =shared_222_table +#else + ldr r_5_tables, =\name\()_5_table + ldr r_222_table, =\name\()_222_table +#endif + ldr r_mask, =#0xff210821 + sub r_row_delta, r1, r_output + add r_output_end, r0, r3, LSL #1 + +\name\()_rem_0: + // r_data: X X X X + ldmia r_input!, {r_data} +align_fall_thru +\name\()_rem_4: + // r_data: D C B A + lsrs r_tmp1, r_data, #FIRST_TEST_SHIFT + bcs 2f + debuggo 4 + lo_to_hi_lo r_top, r_data + lsrs r_tmp2, r_tmp1, #SECOND_TEST_SHIFT + bcc 1f + do_555 + write_output + cmp r_output, r_output_end + blt \name\()_rem_0 + b \name\()_done +.p2align 2 +1: + do_222 + write_output +#if GATE_ALIGN_1_CHECK + b \name\()_rem_1 +#else + cmp r_output, r_output_end + blt \name\()_rem_1 + b \name\()_done +#endif + +.p2align 2 +2: + // r_data: D C B A + debuggo 4 + mov r_top, r_data + ldmia r_input!, {r_data} + // r_top: D C B A + // r_data: H G F E + shuffle_7_bytes_to_8 r_top, r_data, r_tmp1, r_tmp2 + write_output +#if !GATE_ALIGN_1_CHECK + cmp r_output, r_output_end + bge \name\()_done +#endif + + // fall thru +align_fall_thru +\name\()_rem_1: + // r_data : A X X X + debuggo 1 + lsrs r_tmp1, r_data, #24 + // r_tmp1 : 0 0 0 A + ldmia r_input!, {r_data} + // r_data : E D C B + lsls r_tmp2, r_data, #8 + orrs r_tmp2, r_tmp1 + // r_tmp2 : D C B A + lsrs r_tmp1, r_tmp2, #FIRST_TEST_SHIFT + bcs 2f + lo_to_hi_lo r_top, r_tmp2 + lsrs r_tmp2, r_tmp1, #SECOND_TEST_SHIFT + bcc 1f + do_555 + write_output +#if GATE_ALIGN_1_CHECK + b \name\()_rem_1 +#else + cmp r_output, r_output_end + blt \name\()_rem_1 + b \name\()_done +#endif +.p2align 2 +1: + do_222 + write_output +#if GATE_ALIGN_2_CHECK + b \name\()_rem_2 +#else + cmp r_output, r_output_end + blt \name\()_rem_2 + b \name\()_done +#endif +.p2align 2 +2: +#if GATE_ALIGN_1_CHECK + cmp r_output, r_output_end + bge \name\()_done +#endif + + // r_data : E D C B + // r_tmp2 : D C B A + lsrs r_bottom, r_data, #24 + // r_bottom : 0 0 0 E + ldmia r_input!, {r_data} + // r_data : I H G F + lsls r_tmp1, r_data, #8 + orrs r_bottom, r_tmp1 + // r_bottom : H G F E + + shuffle_7_bytes_to_8 r_tmp2, r_bottom, r_tmp1, r_tmp3 + write_output + +#if !GATE_ALIGN_2_CHECK + cmp r_output, r_output_end + bge \name\()_done +#endif + + // fall thru +align_fall_thru +\name\()_rem_2: + // r_data : B A 0 0 + debuggo 2 + lsrs r_tmp1, r_data, #16 + ldmia r_input!, {r_data} + // r_data : F E D C + lsls r_tmp2, r_data, #16 + orrs r_tmp2, r_tmp1 + // r_tmp2 : D C B A + + lsrs r_tmp1, r_tmp2, #FIRST_TEST_SHIFT + bcs 2f + lo_to_hi_lo r_top, r_tmp2 + lsrs r_tmp2, r_tmp1, #SECOND_TEST_SHIFT + bcc 1f + do_555 + write_output +#if GATE_ALIGN_2_CHECK + b \name\()_rem_2 +#else + cmp r_output, r_output_end + blt \name\()_rem_2 +#endif +\name\()_done: +\name\()_done2: + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +.p2align 2 +1: + do_222 + write_output +#if GATE_ALIGN_3_CHECK + b \name\()_rem_3 +#else + cmp r_output, r_output_end + blt \name\()_rem_3 + b \name\()_done +#endif +.p2align 2 +2: +#if GATE_ALIGN_2_CHECK + cmp r_output, r_output_end + bge \name\()_done +#endif + + // r_data : F E D C + // r_tmp2 : D C B A + + lsrs r_bottom, r_data, #16 + // r_bottom: 0 0 F E + ldmia r_input!, {r_data} + // r_data : J I H G + lsls r_tmp1, r_data, #16 + orrs r_bottom, r_tmp1 + // r_bottom : H G F E + + shuffle_7_bytes_to_8 r_tmp2, r_bottom, r_tmp1, r_tmp3 + write_output +#if !GATE_ALIGN_3_CHECK + cmp r_output, r_output_end + bge \name\()_done +#endif + +// fall thru +align_fall_thru +\name\()_rem_3: // r_remaining_bits has 3 bytes remaining (in the MSB) + // r_data : C B A 0 + debuggo 3 + lsrs r_tmp1, r_data, #8 + ldmia r_input!, {r_data} + // r_data : G F E D + lsls r_tmp2, r_data, #24 + orrs r_tmp2, r_tmp1 + // r_tmp2 : D C B A + + lsrs r_tmp1, r_tmp2, #FIRST_TEST_SHIFT + bcs 2f + + lo_to_hi_lo r_top, r_tmp2 + lsrs r_tmp2, r_tmp1, #SECOND_TEST_SHIFT + bcc 1f + do_555 + write_output +#if GATE_ALIGN_3_CHECK + b \name\()_rem_3 +#else + cmp r_output, r_output_end + blt \name\()_rem_3 + b \name\()_done +#endif +.p2align 2 +1: + do_222 + write_output + cmp r_output, r_output_end + blt \name\()_rem_4 + b \name\()_done2 +.p2align 2 +2: +#if GATE_ALIGN_3_CHECK + cmp r_output, r_output_end + bge \name\()_done +#endif + + // r_data : G F E D + // r_tmp2 : D C B A + + lsrs r_bottom, r_data, #8 + // r_bottom : 0 G F E + ldmia r_input!, {r_data} + // r_data : K J I H + lsls r_tmp1, r_data, #24 + orrs r_bottom, r_tmp1 + // r_bottom : H G F E + + shuffle_7_bytes_to_8 r_tmp2, r_bottom, r_tmp1, r_tmp3 + write_output + + cmp r_output, r_output_end + blt \name\()_rem_4 + b \name\()_done2 +.endm + +// put one decompressor in each scratch bank for use by each core +decompressor platypus_decompress_row_asm_a .scratch_x, .scratch_x +decompressor platypus_decompress_row_asm_b .scratch_y, .scratch_y \ No newline at end of file diff --git a/src/common/platypus/platypus.c b/src/common/platypus/platypus.c index 8104205..cdedbac 100644 --- a/src/common/platypus/platypus.c +++ b/src/common/platypus/platypus.c @@ -185,6 +185,14 @@ const uint32_t* platypus_decompress_row(uint32_t *d0, uint32_t *d1, const uint32 #else void platypus_decompress_configure_interp(bool is_b) { +#if PLATYPUS_TABLES_MAIN_RAM + extern uint32_t shared_222_table, shared_5_table; + uint32_t row_5 = (uintptr_t)&shared_5_table; + interp0->base[0] = row_5; + interp0->base[1] = row_5; + interp1->base[0] = row_5; + interp1->base[1] = (uintptr_t)&shared_222_table; +#else extern uint32_t platypus_decompress_row_asm_a_222_table, platypus_decompress_row_asm_a_5_table; extern uint32_t platypus_decompress_row_asm_b_222_table, platypus_decompress_row_asm_b_5_table; uint32_t row_5 = (uintptr_t)(is_b?&platypus_decompress_row_asm_b_5_table:&platypus_decompress_row_asm_a_5_table); @@ -192,6 +200,7 @@ void platypus_decompress_configure_interp(bool is_b) { interp0->base[1] = row_5; interp1->base[0] = row_5; interp1->base[1] = (uintptr_t)(is_b?&platypus_decompress_row_asm_b_222_table:&platypus_decompress_row_asm_a_222_table); +#endif #ifndef VIDEO_DBI const uint es_555 = 0; const uint es_222 = 0; diff --git a/src/rp2_common/pico_scanvideo_dpi/scanvideo.c b/src/rp2_common/pico_scanvideo_dpi/scanvideo.c index dfc5f65..9f3836d 100644 --- a/src/rp2_common/pico_scanvideo_dpi/scanvideo.c +++ b/src/rp2_common/pico_scanvideo_dpi/scanvideo.c @@ -568,15 +568,19 @@ static inline void abort_all_dma_channels_assuming_no_irq_preemption() { // work around it in software, but we want to suppress the IRQ afterwards anyway, so // as long as the spurious IRQ doesn't get taken here, then the h/w issue is of no problem dma_hw->abort = PICO_SCANVIDEO_SCANLINE_DMA_CHANNELS_MASK; - // note that relying on the abort bits is no longer safe, as it may get cleared before the spurious IRQ happens - // // wait for abort(s) to complete - // while (dma_hw->abort & PICO_SCANVIDEO_SCANLINE_DMA_CHANNELS_MASK) tight_loop_contents(); + // note that relying on the abort bits is not safe on RP2040, as it may get cleared before the spurious IRQ happens + // wait for abort(s) to complete +#if !PICO_RP2040 + // fixed after RP2040 + while (dma_hw->abort & PICO_SCANVIDEO_SCANLINE_DMA_CHANNELS_MASK) tight_loop_contents(); +#else while (dma_channel_is_busy(PICO_SCANVIDEO_SCANLINE_DMA_CHANNEL)) tight_loop_contents(); #if PICO_SCANVIDEO_PLANE_COUNT > 1 while (dma_channel_is_busy(PICO_SCANVIDEO_SCANLINE_DMA_CHANNEL2)) tight_loop_contents(); #if PICO_SCANVIDEO_PLANE_COUNT > 2 while (dma_channel_is_busy(PICO_SCANVIDEO_SCANLINE_DMA_CHANNEL3)) tight_loop_contents(); #endif +#endif #endif // we don't want any pending completion IRQ which may have happened in the interim dma_hw->ints0 = PICO_SCANVIDEO_SCANLINE_DMA_CHANNELS_MASK; @@ -740,6 +744,7 @@ void __video_most_time_critical_func(prepare_for_active_scanline_irqs_enabled)() pio_sm_exec(video_pio, PICO_SCANVIDEO_SCANLINE_SM, pio_encode_out(pio_null, 32)); } if (video_pio->sm[PICO_SCANVIDEO_SCANLINE_SM].instr != PIO_WAIT_IRQ4) { + // we don't know where we were, so me should also make sure OSR is empty, we certainly haven't sent any data yet // hmm the problem here is we don't know if we should wait or not, because that is purely based on timing.. // - if irq not posted, and we wait: GOOD // - if irq not posted and we don't wait: BAD. early line diff --git a/src/rp2_common/pico_sleep/sleep.c b/src/rp2_common/pico_sleep/sleep.c index 0aab405..8de37fb 100644 --- a/src/rp2_common/pico_sleep/sleep.c +++ b/src/rp2_common/pico_sleep/sleep.c @@ -43,7 +43,7 @@ void sleep_run_from_dormant_source(dormant_source_t dormant_source) { _dormant_source = dormant_source; // FIXME: Just defining average rosc freq here. - uint src_hz = (dormant_source == DORMANT_SOURCE_XOSC) ? XOSC_MHZ * MHZ : 6.5 * MHZ; + uint src_hz = (dormant_source == DORMANT_SOURCE_XOSC) ? XOSC_HZ : 6.5 * MHZ; uint clk_ref_src = (dormant_source == DORMANT_SOURCE_XOSC) ? CLOCKS_CLK_REF_CTRL_SRC_VALUE_XOSC_CLKSRC : CLOCKS_CLK_REF_CTRL_SRC_VALUE_ROSC_CLKSRC_PH;