From c340596580dfa164540a0805b3196d806c8caa9a Mon Sep 17 00:00:00 2001 From: Scott Shawcroft Date: Fri, 11 Apr 2025 16:20:12 -0700 Subject: [PATCH] Add selective collect to memory allocations By selectively collecting an allocation, we can skip scanning many allocations for pointers because we know up front they won't have them. This helps a ton when large buffers are being used and memory is slow (PSRAM). In one Fruit Jam example GC times drop from 80+ms to ~25ms. The example uses a number of bitmaps that are now no longer scanned. --- extmod/modasyncio.c | 3 +- extmod/vfs.c | 3 +- .../boards/adafruit_fruit_jam/board.c | 4 +- ports/unix/alloc.c | 3 +- ports/unix/mpconfigport.h | 1 + py/bc.h | 4 +- py/circuitpy_mpconfig.h | 1 + py/compile.c | 5 +- py/emitbc.c | 2 +- py/emitglue.c | 3 +- py/gc.c | 243 ++++++++++++------ py/gc.h | 3 + py/lexer.c | 2 +- py/malloc.c | 87 ++++--- py/map.c | 11 +- py/misc.h | 32 ++- py/mpstate.h | 3 + py/obj.c | 5 +- py/objarray.c | 16 +- py/objclosure.c | 2 +- py/objdeque.c | 2 +- py/objdict.c | 3 +- py/objexcept.c | 8 +- py/objfloat.c | 5 +- py/objlist.c | 6 +- py/objmap.c | 3 +- py/objmodule.c | 4 +- py/objproperty.c | 9 +- py/objtuple.c | 2 +- py/objtype.c | 15 +- py/parse.c | 4 +- py/persistentcode.c | 3 +- py/pystack.h | 3 +- py/qstr.c | 3 +- py/runtime.c | 6 +- py/scope.c | 2 +- shared-bindings/displayio/Group.c | 6 +- shared-module/fontio/BuiltinFont.c | 3 +- 38 files changed, 335 insertions(+), 185 deletions(-) diff --git a/extmod/modasyncio.c b/extmod/modasyncio.c index b0af32f70f..f17f0afac3 100644 --- a/extmod/modasyncio.c +++ b/extmod/modasyncio.c @@ -179,7 +179,8 @@ mp_obj_t mp_asyncio_context = MP_OBJ_NULL; static mp_obj_t task_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_kw, const mp_obj_t *args) { mp_arg_check_num(n_args, n_kw, 1, 2, false); - mp_obj_task_t *self = m_new_obj(mp_obj_task_t); + // CIRCUITPY-CHANGE: Task holds onto core and data so collect it. + mp_obj_task_t *self = m_malloc_with_collect(sizeof(mp_obj_task_t)); self->pairheap.base.type = type; mp_pairheap_init_node(task_lt, &self->pairheap); self->coro = args[0]; diff --git a/extmod/vfs.c b/extmod/vfs.c index 4deb8a4428..1025a4402c 100644 --- a/extmod/vfs.c +++ b/extmod/vfs.c @@ -237,7 +237,8 @@ mp_obj_t mp_vfs_mount(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args } // create new object - mp_vfs_mount_t *vfs = m_new_obj(mp_vfs_mount_t); + // CIRCUITPY-CHANGE: Collect the mount object because it references others + mp_vfs_mount_t *vfs = m_malloc_with_collect(sizeof(mp_vfs_mount_t)); vfs->str = mnt_str; vfs->len = mnt_len; vfs->obj = vfs_obj; diff --git a/ports/raspberrypi/boards/adafruit_fruit_jam/board.c b/ports/raspberrypi/boards/adafruit_fruit_jam/board.c index 38fbde53f6..a868bc02b7 100644 --- a/ports/raspberrypi/boards/adafruit_fruit_jam/board.c +++ b/ports/raspberrypi/boards/adafruit_fruit_jam/board.c @@ -16,8 +16,8 @@ #define I2S_RESET_PIN_NUMBER 22 -#if defined(DEFAULT_USB_HOST_5V_POWER) bool board_reset_pin_number(uint8_t pin_number) { + #if defined(DEFAULT_USB_HOST_5V_POWER) if (pin_number == DEFAULT_USB_HOST_5V_POWER->number) { // doing this (rather than gpio_init) in this specific order ensures no // glitch if pin was already configured as a high output. gpio_init() temporarily @@ -29,6 +29,7 @@ bool board_reset_pin_number(uint8_t pin_number) { return true; } + #endif // Set I2S out of reset. if (pin_number == I2S_RESET_PIN_NUMBER) { gpio_put(pin_number, 1); @@ -39,7 +40,6 @@ bool board_reset_pin_number(uint8_t pin_number) { } return false; } -#endif void board_init(void) { // Reset the DAC to put it in a known state. diff --git a/ports/unix/alloc.c b/ports/unix/alloc.c index e9cf521583..a230e0c85b 100644 --- a/ports/unix/alloc.c +++ b/ports/unix/alloc.c @@ -58,7 +58,8 @@ void mp_unix_alloc_exec(size_t min_size, void **ptr, size_t *size) { } // add new link to the list of mmap'd regions - mmap_region_t *rg = m_new_obj(mmap_region_t); + // CIRCUITPY-CHANGE: Collect the mmap region because it points to others. + mmap_region_t *rg = m_malloc_with_collect(sizeof(mmap_region_t)); rg->ptr = *ptr; rg->len = min_size; rg->next = MP_STATE_VM(mmap_region_head); diff --git a/ports/unix/mpconfigport.h b/ports/unix/mpconfigport.h index afc1c3a38d..2d2943fed9 100644 --- a/ports/unix/mpconfigport.h +++ b/ports/unix/mpconfigport.h @@ -112,6 +112,7 @@ typedef long mp_off_t; // Always enable GC. #define MICROPY_ENABLE_GC (1) +#define MICROPY_ENABLE_SELECTIVE_COLLECT (1) #if !(defined(MICROPY_GCREGS_SETJMP) || defined(__x86_64__) || defined(__i386__) || defined(__thumb2__) || defined(__thumb__) || defined(__arm__)) // Fall back to setjmp() implementation for discovery of GC pointers in registers. diff --git a/py/bc.h b/py/bc.h index 007897a028..8d055eee0a 100644 --- a/py/bc.h +++ b/py/bc.h @@ -302,14 +302,14 @@ static inline void mp_module_context_alloc_tables(mp_module_context_t *context, #if MICROPY_EMIT_BYTECODE_USES_QSTR_TABLE size_t nq = (n_qstr * sizeof(qstr_short_t) + sizeof(mp_uint_t) - 1) / sizeof(mp_uint_t); size_t no = n_obj; - mp_uint_t *mem = m_new(mp_uint_t, nq + no); + mp_uint_t *mem = m_malloc_items(nq + no); context->constants.qstr_table = (qstr_short_t *)mem; context->constants.obj_table = (mp_obj_t *)(mem + nq); #else if (n_obj == 0) { context->constants.obj_table = NULL; } else { - context->constants.obj_table = m_new(mp_obj_t, n_obj); + context->constants.obj_table = m_malloc_items(n_obj); } #endif } diff --git a/py/circuitpy_mpconfig.h b/py/circuitpy_mpconfig.h index d7a230bf61..d6658867b7 100644 --- a/py/circuitpy_mpconfig.h +++ b/py/circuitpy_mpconfig.h @@ -62,6 +62,7 @@ extern void common_hal_mcu_enable_interrupts(void); #define MICROPY_EMIT_X64 (0) #define MICROPY_ENABLE_DOC_STRING (0) #define MICROPY_ENABLE_FINALISER (1) +#define MICROPY_ENABLE_SELECTIVE_COLLECT (1) #define MICROPY_ENABLE_GC (1) #define MICROPY_ENABLE_PYSTACK (1) #define MICROPY_TRACKED_ALLOC (CIRCUITPY_SSL_MBEDTLS) diff --git a/py/compile.c b/py/compile.c index 085c342605..394b2df54c 100644 --- a/py/compile.c +++ b/py/compile.c @@ -221,7 +221,8 @@ static void mp_emit_common_start_pass(mp_emit_common_t *emit, pass_kind_t pass) if (emit->ct_cur_child == 0) { emit->children = NULL; } else { - emit->children = m_new0(mp_raw_code_t *, emit->ct_cur_child); + // CIRCUITPY-CHANGE: Use m_malloc_helper with collect flag to support selective collection + emit->children = m_malloc_helper(sizeof(mp_raw_code_t *) * (emit->ct_cur_child), M_MALLOC_ENSURE_ZEROED | M_MALLOC_RAISE_ERROR | M_MALLOC_COLLECT); } } emit->ct_cur_child = 0; @@ -3688,7 +3689,7 @@ void mp_compile_to_raw_code(mp_parse_tree_t *parse_tree, qstr source_file, bool mp_obj_t mp_compile(mp_parse_tree_t *parse_tree, qstr source_file, bool is_repl) { mp_compiled_module_t cm; - cm.context = m_new_obj(mp_module_context_t); + cm.context = m_malloc_with_collect(sizeof(mp_module_context_t)); cm.context->module.globals = mp_globals_get(); mp_compile_to_raw_code(parse_tree, source_file, is_repl, &cm); // return function that executes the outer module diff --git a/py/emitbc.c b/py/emitbc.c index 05754cfabf..9f5389361b 100644 --- a/py/emitbc.c +++ b/py/emitbc.c @@ -76,7 +76,7 @@ struct _emit_t { }; emit_t *emit_bc_new(mp_emit_common_t *emit_common) { - emit_t *emit = m_new0(emit_t, 1); + emit_t *emit = m_new_struct_with_collect(emit_t, 1); emit->emit_common = emit_common; return emit; } diff --git a/py/emitglue.c b/py/emitglue.c index 8ab624ee7c..d7b7e113eb 100644 --- a/py/emitglue.c +++ b/py/emitglue.c @@ -54,7 +54,8 @@ mp_uint_t mp_verbose_flag = 0; #endif mp_raw_code_t *mp_emit_glue_new_raw_code(void) { - mp_raw_code_t *rc = m_new0(mp_raw_code_t, 1); + // CIRCUITPY-CHANGE: Use m_malloc_helper with collect flag because raw code children are allocations too. + mp_raw_code_t *rc = m_malloc_helper(sizeof(mp_raw_code_t), M_MALLOC_ENSURE_ZEROED | M_MALLOC_RAISE_ERROR | M_MALLOC_COLLECT); rc->kind = MP_CODE_RESERVED; #if MICROPY_PY_SYS_SETTRACE rc->line_of_definition = 0; diff --git a/py/gc.c b/py/gc.c index 5375218f3e..20fbb28555 100644 --- a/py/gc.c +++ b/py/gc.c @@ -39,6 +39,8 @@ // CIRCUITPY-CHANGE #include "supervisor/shared/safe_mode.h" +#include "supervisor/shared/serial.h" + #if CIRCUITPY_MEMORYMONITOR #include "shared-module/memorymonitor/__init__.h" #endif @@ -123,6 +125,16 @@ #define FTB_CLEAR(area, block) do { area->gc_finaliser_table_start[(block) / BLOCKS_PER_FTB] &= (~(1 << ((block) & 7))); } while (0) #endif +// CIRCUITPY-CHANGE: Add selective collect table to skip scanning large buffers without pointers +// CTB = collect table byte +// if set, then the corresponding block should be collected during GC + +#define BLOCKS_PER_CTB (8) + +#define CTB_GET(area, block) ((area->gc_collect_table_start[(block) / BLOCKS_PER_CTB] >> ((block) & 7)) & 1) +#define CTB_SET(area, block) do { area->gc_collect_table_start[(block) / BLOCKS_PER_CTB] |= (1 << ((block) & 7)); } while (0) +#define CTB_CLEAR(area, block) do { area->gc_collect_table_start[(block) / BLOCKS_PER_CTB] &= (~(1 << ((block) & 7))); } while (0) + #if MICROPY_PY_THREAD && !MICROPY_PY_THREAD_GIL #define GC_ENTER() mp_thread_mutex_lock(&MP_STATE_MEM(gc_mutex), 1) #define GC_EXIT() mp_thread_mutex_unlock(&MP_STATE_MEM(gc_mutex)) @@ -143,48 +155,66 @@ void __attribute__ ((noinline)) gc_log_change(uint32_t start_block, uint32_t len #pragma GCC pop_options #endif + // TODO waste less memory; currently requires that all entries in alloc_table have a corresponding block in pool static void gc_setup_area(mp_state_mem_area_t *area, void *start, void *end) { - // calculate parameters for GC (T=total, A=alloc table, F=finaliser table, P=pool; all in bytes): - // T = A + F + P + // CIRCUITPY-CHANGE: Updated calculation to include selective collect table + // calculate parameters for GC (T=total, A=alloc table, F=finaliser table, C=collect table, P=pool; all in bytes): + // T = A + F + C + P // F = A * BLOCKS_PER_ATB / BLOCKS_PER_FTB + // C = A * BLOCKS_PER_ATB / BLOCKS_PER_CTB // P = A * BLOCKS_PER_ATB * BYTES_PER_BLOCK - // => T = A * (1 + BLOCKS_PER_ATB / BLOCKS_PER_FTB + BLOCKS_PER_ATB * BYTES_PER_BLOCK) + size_t total_byte_len = (byte *)end - (byte *)start; + + // Calculate the denominator for the alloc table size calculation + size_t bits_per_block = MP_BITS_PER_BYTE / BLOCKS_PER_ATB; // Start with bits for ATB + #if MICROPY_ENABLE_FINALISER - area->gc_alloc_table_byte_len = (total_byte_len - ALLOC_TABLE_GAP_BYTE) - * MP_BITS_PER_BYTE - / ( - MP_BITS_PER_BYTE - + MP_BITS_PER_BYTE * BLOCKS_PER_ATB / BLOCKS_PER_FTB - + MP_BITS_PER_BYTE * BLOCKS_PER_ATB * BYTES_PER_BLOCK - ); - #else - area->gc_alloc_table_byte_len = (total_byte_len - ALLOC_TABLE_GAP_BYTE) / (1 + MP_BITS_PER_BYTE / 2 * BYTES_PER_BLOCK); + bits_per_block += MP_BITS_PER_BYTE / BLOCKS_PER_FTB; // Add bits for FTB #endif + #if MICROPY_ENABLE_SELECTIVE_COLLECT + bits_per_block += MP_BITS_PER_BYTE / BLOCKS_PER_CTB; // Add bits for CTB + #endif + + bits_per_block += MP_BITS_PER_BYTE * BYTES_PER_BLOCK; // Add bits for the block itself + + // Calculate the allocation table size + size_t available_bits = (total_byte_len - ALLOC_TABLE_GAP_BYTE) * MP_BITS_PER_BYTE; + size_t blocks = available_bits / bits_per_block; + area->gc_alloc_table_byte_len = blocks / BLOCKS_PER_ATB; + + // Set up all the table pointers area->gc_alloc_table_start = (byte *)start; + byte *next_table = area->gc_alloc_table_start + area->gc_alloc_table_byte_len + ALLOC_TABLE_GAP_BYTE; + // Total number of blocks in the pool + size_t gc_pool_block_len = area->gc_alloc_table_byte_len * BLOCKS_PER_ATB; + + // Calculate table sizes and set start pointers #if MICROPY_ENABLE_FINALISER - size_t gc_finaliser_table_byte_len = (area->gc_alloc_table_byte_len * BLOCKS_PER_ATB + BLOCKS_PER_FTB - 1) / BLOCKS_PER_FTB; - area->gc_finaliser_table_start = area->gc_alloc_table_start + area->gc_alloc_table_byte_len + ALLOC_TABLE_GAP_BYTE; + size_t gc_finaliser_table_byte_len = (gc_pool_block_len + BLOCKS_PER_FTB - 1) / BLOCKS_PER_FTB; + area->gc_finaliser_table_start = next_table; + next_table += gc_finaliser_table_byte_len; #endif - size_t gc_pool_block_len = area->gc_alloc_table_byte_len * BLOCKS_PER_ATB; + #if MICROPY_ENABLE_SELECTIVE_COLLECT + size_t gc_collect_table_byte_len = (gc_pool_block_len + BLOCKS_PER_CTB - 1) / BLOCKS_PER_CTB; + area->gc_collect_table_start = next_table; + next_table += gc_collect_table_byte_len; + #endif + + // Set pool pointers area->gc_pool_start = (byte *)end - gc_pool_block_len * BYTES_PER_BLOCK; area->gc_pool_end = end; - #if MICROPY_ENABLE_FINALISER - assert(area->gc_pool_start >= area->gc_finaliser_table_start + gc_finaliser_table_byte_len); - #endif + // Verify enough space between last table and start of pool + assert(area->gc_pool_start >= next_table); - #if MICROPY_ENABLE_FINALISER - // clear ATB's and FTB's - memset(area->gc_alloc_table_start, 0, gc_finaliser_table_byte_len + area->gc_alloc_table_byte_len + ALLOC_TABLE_GAP_BYTE); - #else - // clear ATB's - memset(area->gc_alloc_table_start, 0, area->gc_alloc_table_byte_len + ALLOC_TABLE_GAP_BYTE); - #endif + // Clear all tables + size_t tables_size = next_table - area->gc_alloc_table_start; + memset(area->gc_alloc_table_start, 0, tables_size); area->gc_last_free_atb_index = 0; area->gc_last_used_block = 0; @@ -204,6 +234,12 @@ static void gc_setup_area(mp_state_mem_area_t *area, void *start, void *end) { gc_finaliser_table_byte_len, gc_finaliser_table_byte_len * BLOCKS_PER_FTB); #endif + #if MICROPY_ENABLE_SELECTIVE_COLLECT + DEBUG_printf(" collect table at %p, length " UINT_FMT " bytes, " + UINT_FMT " blocks\n", area->gc_collect_table_start, + gc_collect_table_byte_len, + gc_collect_table_byte_len * BLOCKS_PER_CTB); + #endif DEBUG_printf(" pool at %p, length " UINT_FMT " bytes, " UINT_FMT " blocks\n", area->gc_pool_start, gc_pool_block_len * BYTES_PER_BLOCK, gc_pool_block_len); @@ -261,16 +297,42 @@ void gc_add(void *start, void *end) { } #if MICROPY_GC_SPLIT_HEAP_AUTO +// CIRCUITPY-CHANGE: Added function to compute heap size with selective collect table +static size_t compute_heap_size(size_t total_blocks) { + // Add two blocks to account for allocation alignment. + total_blocks += 2; + size_t atb_bytes = (total_blocks + BLOCKS_PER_ATB - 1) / BLOCKS_PER_ATB; + size_t ftb_bytes = 0; + size_t ctb_bytes = 0; + #if MICROPY_ENABLE_FINALISER + ftb_bytes = (total_blocks + BLOCKS_PER_FTB - 1) / BLOCKS_PER_FTB; + #endif + #if MICROPY_ENABLE_SELECTIVE_COLLECT + ctb_bytes = (total_blocks + BLOCKS_PER_CTB - 1) / BLOCKS_PER_CTB; + #endif + size_t pool_bytes = total_blocks * BYTES_PER_BLOCK; + + // Compute bytes needed to build a heap with total_blocks blocks. + size_t total_heap = + atb_bytes + + ftb_bytes + + ctb_bytes + + pool_bytes + + ALLOC_TABLE_GAP_BYTE + + sizeof(mp_state_mem_area_t); + + // Round up size to the nearest multiple of BYTES_PER_BLOCK. + total_heap = (total_heap + BYTES_PER_BLOCK - 1) / BYTES_PER_BLOCK; + total_heap *= BYTES_PER_BLOCK; + return total_heap; +} + // Try to automatically add a heap area large enough to fulfill 'failed_alloc'. static bool gc_try_add_heap(size_t failed_alloc) { // 'needed' is the size of a heap large enough to hold failed_alloc, with // the additional metadata overheads as calculated in gc_setup_area(). - // - // Rather than reproduce all of that logic here, we approximate that adding - // (13/512) is enough overhead for sufficiently large heap areas (the - // overhead converges to 3/128, but there's some fixed overhead and some - // rounding up of partial block sizes). - size_t needed = failed_alloc + MAX(2048, failed_alloc * 13 / 512); + size_t total_new_blocks = (failed_alloc + BYTES_PER_BLOCK - 1) / BYTES_PER_BLOCK; + size_t needed = compute_heap_size(total_new_blocks); size_t avail = gc_get_max_new_split(); @@ -314,18 +376,7 @@ static bool gc_try_add_heap(size_t failed_alloc) { total_blocks += area->gc_alloc_table_byte_len * BLOCKS_PER_ATB; } - // Compute bytes needed to build a heap with total_blocks blocks. - size_t total_heap = - total_blocks / BLOCKS_PER_ATB - #if MICROPY_ENABLE_FINALISER - + total_blocks / BLOCKS_PER_FTB - #endif - + total_blocks * BYTES_PER_BLOCK - + ALLOC_TABLE_GAP_BYTE - + sizeof(mp_state_mem_area_t); - - // Round up size to the nearest multiple of BYTES_PER_BLOCK. - total_heap = (total_heap + BYTES_PER_BLOCK - 1) & (~(BYTES_PER_BLOCK - 1)); + size_t total_heap = compute_heap_size(total_blocks); DEBUG_printf("total_heap " UINT_FMT " bytes\n", total_heap); @@ -447,41 +498,51 @@ static void MP_NO_INSTRUMENT PLACE_IN_ITCM(gc_mark_subtree)(size_t block) // check that the consecutive blocks didn't overflow past the end of the area assert(area->gc_pool_start + (block + n_blocks) * BYTES_PER_BLOCK <= area->gc_pool_end); - // check this block's children - void **ptrs = (void **)PTR_FROM_BLOCK(area, block); - for (size_t i = n_blocks * BYTES_PER_BLOCK / sizeof(void *); i > 0; i--, ptrs++) { - MICROPY_GC_HOOK_LOOP(i); - void *ptr = *ptrs; - // If this is a heap pointer that hasn't been marked, mark it and push - // it's children to the stack. - #if MICROPY_GC_SPLIT_HEAP - mp_state_mem_area_t *ptr_area = gc_get_ptr_area(ptr); - if (!ptr_area) { - // Not a heap-allocated pointer (might even be random data). - continue; - } - #else - if (!VERIFY_PTR(ptr)) { - continue; - } - mp_state_mem_area_t *ptr_area = area; - #endif - size_t ptr_block = BLOCK_FROM_PTR(ptr_area, ptr); - if (ATB_GET_KIND(ptr_area, ptr_block) != AT_HEAD) { - // This block is already marked. - continue; - } - // An unmarked head. Mark it, and push it on gc stack. - TRACE_MARK(ptr_block, ptr); - ATB_HEAD_TO_MARK(ptr_area, ptr_block); - if (sp < MICROPY_ALLOC_GC_STACK_SIZE) { - MP_STATE_MEM(gc_block_stack)[sp] = ptr_block; + // check if this block should be collected + #if MICROPY_ENABLE_SELECTIVE_COLLECT + bool should_scan = CTB_GET(area, block); + #else + bool should_scan = true; + #endif + + // Only scan the block's children if it's not a leaf + if (should_scan) { + // check this block's children + void **ptrs = (void **)PTR_FROM_BLOCK(area, block); + for (size_t i = n_blocks * BYTES_PER_BLOCK / sizeof(void *); i > 0; i--, ptrs++) { + MICROPY_GC_HOOK_LOOP(i); + void *ptr = *ptrs; + // If this is a heap pointer that hasn't been marked, mark it and push + // it's children to the stack. #if MICROPY_GC_SPLIT_HEAP - MP_STATE_MEM(gc_area_stack)[sp] = ptr_area; + mp_state_mem_area_t *ptr_area = gc_get_ptr_area(ptr); + if (!ptr_area) { + // Not a heap-allocated pointer (might even be random data). + continue; + } + #else + if (!VERIFY_PTR(ptr)) { + continue; + } + mp_state_mem_area_t *ptr_area = area; #endif - sp += 1; - } else { - MP_STATE_MEM(gc_stack_overflow) = 1; + size_t ptr_block = BLOCK_FROM_PTR(ptr_area, ptr); + if (ATB_GET_KIND(ptr_area, ptr_block) != AT_HEAD) { + // This block is already marked. + continue; + } + // An unmarked head. Mark it, and push it on gc stack. + TRACE_MARK(ptr_block, ptr); + ATB_HEAD_TO_MARK(ptr_area, ptr_block); + if (sp < MICROPY_ALLOC_GC_STACK_SIZE) { + MP_STATE_MEM(gc_block_stack)[sp] = ptr_block; + #if MICROPY_GC_SPLIT_HEAP + MP_STATE_MEM(gc_area_stack)[sp] = ptr_area; + #endif + sp += 1; + } else { + MP_STATE_MEM(gc_stack_overflow) = 1; + } } } @@ -944,6 +1005,19 @@ found: (void)has_finaliser; #endif + #if MICROPY_ENABLE_SELECTIVE_COLLECT + bool do_not_collect = (alloc_flags & GC_ALLOC_FLAG_DO_NOT_COLLECT) != 0; + GC_ENTER(); + if (do_not_collect) { + // Mark as not to be collected + CTB_CLEAR(area, start_block); + } else { + // By default, all blocks should be collected + CTB_SET(area, start_block); + } + GC_EXIT(); + #endif + #if EXTENSIVE_HEAP_PROFILING gc_dump_alloc_table(&mp_plat_print); #endif @@ -1110,7 +1184,7 @@ void *gc_realloc(void *ptr, mp_uint_t n_bytes) { void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) { // check for pure allocation if (ptr_in == NULL) { - return gc_alloc(n_bytes, false); + return gc_alloc(n_bytes, 0); } // check for pure free @@ -1248,10 +1322,17 @@ void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) { return ptr_in; } + uint8_t alloc_flags = 0; #if MICROPY_ENABLE_FINALISER - bool ftb_state = FTB_GET(area, block); - #else - bool ftb_state = false; + if (FTB_GET(area, block)) { + alloc_flags |= GC_ALLOC_FLAG_HAS_FINALISER; + } + #endif + + #if MICROPY_ENABLE_SELECTIVE_COLLECT + if (!CTB_GET(area, block)) { + alloc_flags |= GC_ALLOC_FLAG_DO_NOT_COLLECT; + } #endif GC_EXIT(); @@ -1262,7 +1343,7 @@ void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) { } // can't resize inplace; try to find a new contiguous chain - void *ptr_out = gc_alloc(n_bytes, ftb_state); + void *ptr_out = gc_alloc(n_bytes, alloc_flags); // check that the alloc succeeded if (ptr_out == NULL) { diff --git a/py/gc.h b/py/gc.h index 5f4b18f7e6..6e16f68a06 100644 --- a/py/gc.h +++ b/py/gc.h @@ -73,6 +73,9 @@ void gc_sweep_all(void); enum { GC_ALLOC_FLAG_HAS_FINALISER = 1, + #if MICROPY_ENABLE_SELECTIVE_COLLECT + GC_ALLOC_FLAG_DO_NOT_COLLECT = 2, + #endif }; void *gc_alloc(size_t n_bytes, unsigned int alloc_flags); diff --git a/py/lexer.c b/py/lexer.c index bff8e63765..2f8fa71cfd 100644 --- a/py/lexer.c +++ b/py/lexer.c @@ -840,7 +840,7 @@ void mp_lexer_to_next(mp_lexer_t *lex) { } mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader) { - mp_lexer_t *lex = m_new_obj(mp_lexer_t); + mp_lexer_t *lex = m_new_struct_with_collect(mp_lexer_t, 1); lex->source_name = src_name; lex->reader = reader; diff --git a/py/malloc.c b/py/malloc.c index 60f0df7455..84bd015281 100644 --- a/py/malloc.c +++ b/py/malloc.c @@ -24,6 +24,7 @@ * THE SOFTWARE. */ +#include #include #include #include @@ -53,11 +54,19 @@ // freely accessed - for interfacing with system and 3rd-party libs for // example. On the other hand, some (e.g. bare-metal) ports may use GC // heap as system heap, so, to avoid warnings, we do undef's first. +// CIRCUITPY-CHANGE: Add selective collect support to malloc to optimize GC for large buffers #undef malloc #undef free #undef realloc -#define malloc(b) gc_alloc((b), false) -#define malloc_with_finaliser(b) gc_alloc((b), true) +#if MICROPY_ENABLE_SELECTIVE_COLLECT +#define malloc(b) gc_alloc((b), GC_ALLOC_FLAG_DO_NOT_COLLECT) +#define malloc_with_collect(b) gc_alloc((b), 0) +#define malloc_without_collect(b) gc_alloc((b), GC_ALLOC_FLAG_DO_NOT_COLLECT) +#else +#define malloc(b) gc_alloc((b), 0) +#define malloc_with_collect(b) gc_alloc((b), 0) +#endif +#define malloc_with_finaliser(b) gc_alloc((b), GC_ALLOC_FLAG_HAS_FINALISER) #define free gc_free #define realloc(ptr, n) gc_realloc(ptr, n, true) #define realloc_ext(ptr, n, mv) gc_realloc(ptr, n, mv) @@ -69,6 +78,10 @@ #error MICROPY_ENABLE_FINALISER requires MICROPY_ENABLE_GC #endif +#if MICROPY_ENABLE_SELECTIVE_COLLECT +#error MICROPY_ENABLE_SELECTIVE_COLLECT requires MICROPY_ENABLE_GC +#endif + static void *realloc_ext(void *ptr, size_t n_bytes, bool allow_move) { if (allow_move) { return realloc(ptr, n_bytes); @@ -82,9 +95,23 @@ static void *realloc_ext(void *ptr, size_t n_bytes, bool allow_move) { #endif // MICROPY_ENABLE_GC -void *m_malloc(size_t num_bytes) { - void *ptr = malloc(num_bytes); - if (ptr == NULL && num_bytes != 0) { +// CIRCUITPY-CHANGE: Add malloc helper with flags instead of a list of bools. +void *m_malloc_helper(size_t num_bytes, uint8_t flags) { + void *ptr; + #if MICROPY_ENABLE_GC + #if MICROPY_ENABLE_SELECTIVE_COLLECT + if ((flags & M_MALLOC_COLLECT) == 0) { + ptr = malloc_without_collect(num_bytes); + } else { + ptr = malloc_with_collect(num_bytes); + } + #else + ptr = malloc_with_collect(num_bytes); + #endif + #else + ptr = malloc(num_bytes); + #endif + if (ptr == NULL && num_bytes != 0 && (flags & M_MALLOC_RAISE_ERROR)) { m_malloc_fail(num_bytes); } #if MICROPY_MEM_STATS @@ -92,44 +119,34 @@ void *m_malloc(size_t num_bytes) { MP_STATE_MEM(current_bytes_allocated) += num_bytes; UPDATE_PEAK(); #endif + // If this config is set then the GC clears all memory, so we don't need to. + #if !MICROPY_GC_CONSERVATIVE_CLEAR + if (flags & M_MALLOC_ENSURE_ZEROED) { + memset(ptr, 0, num_bytes); + } + #endif DEBUG_printf("malloc %d : %p\n", num_bytes, ptr); return ptr; } +void *m_malloc(size_t num_bytes) { + return m_malloc_helper(num_bytes, M_MALLOC_RAISE_ERROR); +} + void *m_malloc_maybe(size_t num_bytes) { - void *ptr = malloc(num_bytes); - #if MICROPY_MEM_STATS - MP_STATE_MEM(total_bytes_allocated) += num_bytes; - MP_STATE_MEM(current_bytes_allocated) += num_bytes; - UPDATE_PEAK(); - #endif - DEBUG_printf("malloc %d : %p\n", num_bytes, ptr); - return ptr; + return m_malloc_helper(num_bytes, 0); } -#if MICROPY_ENABLE_FINALISER -void *m_malloc_with_finaliser(size_t num_bytes) { - void *ptr = malloc_with_finaliser(num_bytes); - if (ptr == NULL && num_bytes != 0) { - m_malloc_fail(num_bytes); - } - #if MICROPY_MEM_STATS - MP_STATE_MEM(total_bytes_allocated) += num_bytes; - MP_STATE_MEM(current_bytes_allocated) += num_bytes; - UPDATE_PEAK(); - #endif - DEBUG_printf("malloc %d : %p\n", num_bytes, ptr); - return ptr; -} -#endif - void *m_malloc0(size_t num_bytes) { - void *ptr = m_malloc(num_bytes); - // If this config is set then the GC clears all memory, so we don't need to. - #if !MICROPY_GC_CONSERVATIVE_CLEAR - memset(ptr, 0, num_bytes); - #endif - return ptr; + return m_malloc_helper(num_bytes, M_MALLOC_ENSURE_ZEROED | M_MALLOC_RAISE_ERROR); +} + +void *m_malloc_with_collect(size_t num_bytes) { + return m_malloc_helper(num_bytes, M_MALLOC_RAISE_ERROR | M_MALLOC_COLLECT); +} + +void *m_malloc_maybe_with_collect(size_t num_bytes) { + return m_malloc_helper(num_bytes, M_MALLOC_COLLECT); } #if MICROPY_MALLOC_USES_ALLOCATED_SIZE diff --git a/py/map.c b/py/map.c index d40e3dc4d0..ce2d305a49 100644 --- a/py/map.c +++ b/py/map.c @@ -86,13 +86,16 @@ static size_t get_hash_alloc_greater_or_equal_to(size_t x) { /******************************************************************************/ /* map */ +// CIRCUITPY-CHANGE: Helper for allocating tables of elements +#define malloc_table(num) m_malloc_helper(sizeof(mp_map_elem_t) * (num), M_MALLOC_COLLECT | M_MALLOC_RAISE_ERROR | M_MALLOC_ENSURE_ZEROED) + void mp_map_init(mp_map_t *map, size_t n) { if (n == 0) { map->alloc = 0; map->table = NULL; } else { map->alloc = n; - map->table = m_new0(mp_map_elem_t, map->alloc); + map->table = malloc_table(map->alloc); } map->used = 0; map->all_keys_are_qstrs = 1; @@ -133,7 +136,7 @@ static void mp_map_rehash(mp_map_t *map) { size_t new_alloc = get_hash_alloc_greater_or_equal_to(map->alloc + 1); DEBUG_printf("mp_map_rehash(%p): " UINT_FMT " -> " UINT_FMT "\n", map, old_alloc, new_alloc); mp_map_elem_t *old_table = map->table; - mp_map_elem_t *new_table = m_new0(mp_map_elem_t, new_alloc); + mp_map_elem_t *new_table = malloc_table(new_alloc); // If we reach this point, table resizing succeeded, now we can edit the old map. map->alloc = new_alloc; map->used = 0; @@ -329,7 +332,7 @@ mp_map_elem_t *MICROPY_WRAP_MP_MAP_LOOKUP(mp_map_lookup)(mp_map_t * map, mp_obj_ void mp_set_init(mp_set_t *set, size_t n) { set->alloc = n; set->used = 0; - set->table = m_new0(mp_obj_t, set->alloc); + set->table = m_malloc_items0(set->alloc); } static void mp_set_rehash(mp_set_t *set) { @@ -337,7 +340,7 @@ static void mp_set_rehash(mp_set_t *set) { mp_obj_t *old_table = set->table; set->alloc = get_hash_alloc_greater_or_equal_to(set->alloc + 1); set->used = 0; - set->table = m_new0(mp_obj_t, set->alloc); + set->table = m_malloc_items0(set->alloc); for (size_t i = 0; i < old_alloc; i++) { if (old_table[i] != MP_OBJ_NULL && old_table[i] != MP_OBJ_SENTINEL) { mp_set_lookup(set, old_table[i], MP_MAP_LOOKUP_ADD_IF_NOT_FOUND); diff --git a/py/misc.h b/py/misc.h index 769d12ddbe..c8b2347c3d 100644 --- a/py/misc.h +++ b/py/misc.h @@ -74,14 +74,21 @@ typedef unsigned int uint; // TODO make a lazy m_renew that can increase by a smaller amount than requested (but by at least 1 more element) -#define m_new(type, num) ((type *)(m_malloc(sizeof(type) * (num)))) -#define m_new_maybe(type, num) ((type *)(m_malloc_maybe(sizeof(type) * (num)))) -#define m_new0(type, num) ((type *)(m_malloc0(sizeof(type) * (num)))) +// The following are convenience wrappers for m_malloc_helper and can save space at the call sites. +// m_malloc and m_new allocate space that is not collected and does not have a finaliser. +// Use m_malloc_items() to allocate space for mp_obj_t that will be collected. +// Use mp_obj_malloc*() to allocate space for objects (aka structs with a type pointer) that will be +// collected. + +#define m_new(type, num) ((type *)(m_malloc_helper(sizeof(type) * (num), M_MALLOC_RAISE_ERROR))) +#define m_new_struct_with_collect(type, num) ((type *)(m_malloc_helper(sizeof(type) * (num), M_MALLOC_RAISE_ERROR | M_MALLOC_COLLECT))) +#define m_new_maybe(type, num) ((type *)(m_malloc_helper(sizeof(type) * (num), 0))) +#define m_new0(type, num) ((type *)(m_malloc_helper(sizeof(type) * (num), M_MALLOC_ENSURE_ZEROED | M_MALLOC_RAISE_ERROR))) #define m_new_obj(type) (m_new(type, 1)) #define m_new_obj_maybe(type) (m_new_maybe(type, 1)) -#define m_new_obj_var(obj_type, var_field, var_type, var_num) ((obj_type *)m_malloc(offsetof(obj_type, var_field) + sizeof(var_type) * (var_num))) -#define m_new_obj_var0(obj_type, var_field, var_type, var_num) ((obj_type *)m_malloc0(offsetof(obj_type, var_field) + sizeof(var_type) * (var_num))) -#define m_new_obj_var_maybe(obj_type, var_field, var_type, var_num) ((obj_type *)m_malloc_maybe(offsetof(obj_type, var_field) + sizeof(var_type) * (var_num))) +#define m_new_obj_var(obj_type, var_field, var_type, var_num) ((obj_type *)m_malloc_helper(offsetof(obj_type, var_field) + sizeof(var_type) * (var_num), M_MALLOC_RAISE_ERROR | M_MALLOC_COLLECT)) +#define m_new_obj_var0(obj_type, var_field, var_type, var_num) ((obj_type *)m_malloc_helper(offsetof(obj_type, var_field) + sizeof(var_type) * (var_num), M_MALLOC_ENSURE_ZEROED | M_MALLOC_RAISE_ERROR | M_MALLOC_COLLECT)) +#define m_new_obj_var_maybe(obj_type, var_field, var_type, var_num) ((obj_type *)m_malloc_helper(offsetof(obj_type, var_field) + sizeof(var_type) * (var_num), M_MALLOC_COLLECT)) #if MICROPY_MALLOC_USES_ALLOCATED_SIZE #define m_renew(type, ptr, old_num, new_num) ((type *)(m_realloc((ptr), sizeof(type) * (old_num), sizeof(type) * (new_num)))) #define m_renew_maybe(type, ptr, old_num, new_num, allow_move) ((type *)(m_realloc_maybe((ptr), sizeof(type) * (old_num), sizeof(type) * (new_num), (allow_move)))) @@ -95,10 +102,21 @@ typedef unsigned int uint; #endif #define m_del_obj(type, ptr) (m_del(type, ptr, 1)) +#define m_malloc_items(num) m_malloc_helper(sizeof(mp_obj_t) * (num), M_MALLOC_RAISE_ERROR | M_MALLOC_COLLECT) +#define m_malloc_items0(num) m_malloc_helper(sizeof(mp_obj_t) * (num), M_MALLOC_ENSURE_ZEROED | M_MALLOC_RAISE_ERROR | M_MALLOC_COLLECT) + +// Flags for m_malloc_helper +#define M_MALLOC_ENSURE_ZEROED (1 << 0) +#define M_MALLOC_RAISE_ERROR (1 << 1) +#define M_MALLOC_COLLECT (1 << 2) +#define M_MALLOC_WITH_FINALISER (1 << 3) + +void *m_malloc_helper(size_t num_bytes, uint8_t flags); void *m_malloc(size_t num_bytes); void *m_malloc_maybe(size_t num_bytes); -void *m_malloc_with_finaliser(size_t num_bytes); void *m_malloc0(size_t num_bytes); +void *m_malloc_with_collect(size_t num_bytes); +void *m_malloc_maybe_with_collect(size_t num_bytes); #if MICROPY_MALLOC_USES_ALLOCATED_SIZE void *m_realloc(void *ptr, size_t old_num_bytes, size_t new_num_bytes); void *m_realloc_maybe(void *ptr, size_t old_num_bytes, size_t new_num_bytes, bool allow_move); diff --git a/py/mpstate.h b/py/mpstate.h index 7308e57b58..229a5c4736 100644 --- a/py/mpstate.h +++ b/py/mpstate.h @@ -100,6 +100,9 @@ typedef struct _mp_state_mem_area_t { #if MICROPY_ENABLE_FINALISER byte *gc_finaliser_table_start; #endif + #if MICROPY_ENABLE_SELECTIVE_COLLECT + byte *gc_collect_table_start; + #endif byte *gc_pool_start; byte *gc_pool_end; diff --git a/py/obj.c b/py/obj.c index a825efc3c5..d3f871ece9 100644 --- a/py/obj.c +++ b/py/obj.c @@ -31,6 +31,7 @@ // CIRCUITPY-CHANGE #include "shared/runtime/interrupt_char.h" +#include "py/misc.h" #include "py/obj.h" #include "py/objtype.h" #include "py/objint.h" @@ -46,7 +47,7 @@ // Allocates an object and also sets type, for mp_obj_malloc{,_var} macros. MP_NOINLINE void *mp_obj_malloc_helper(size_t num_bytes, const mp_obj_type_t *type) { - mp_obj_base_t *base = (mp_obj_base_t *)m_malloc(num_bytes); + mp_obj_base_t *base = (mp_obj_base_t *)m_malloc_helper(num_bytes, M_MALLOC_RAISE_ERROR | M_MALLOC_COLLECT); base->type = type; return base; } @@ -54,7 +55,7 @@ MP_NOINLINE void *mp_obj_malloc_helper(size_t num_bytes, const mp_obj_type_t *ty #if MICROPY_ENABLE_FINALISER // Allocates an object and also sets type, for mp_obj_malloc{,_var}_with_finaliser macros. MP_NOINLINE void *mp_obj_malloc_with_finaliser_helper(size_t num_bytes, const mp_obj_type_t *type) { - mp_obj_base_t *base = (mp_obj_base_t *)m_malloc_with_finaliser(num_bytes); + mp_obj_base_t *base = (mp_obj_base_t *)m_malloc_helper(num_bytes, M_MALLOC_RAISE_ERROR | M_MALLOC_COLLECT | M_MALLOC_WITH_FINALISER); base->type = type; return base; } diff --git a/py/objarray.c b/py/objarray.c index 3ab982184f..c7a8753415 100644 --- a/py/objarray.c +++ b/py/objarray.c @@ -105,18 +105,20 @@ static mp_obj_array_t *array_new(char typecode, size_t n) { mp_raise_ValueError(MP_ERROR_TEXT("bad typecode")); } int typecode_size = mp_binary_get_size('@', typecode, NULL); - mp_obj_array_t *o = m_new_obj(mp_obj_array_t); + + const mp_obj_type_t *type; #if MICROPY_PY_BUILTINS_BYTEARRAY && MICROPY_PY_ARRAY - o->base.type = (typecode == BYTEARRAY_TYPECODE) ? &mp_type_bytearray : &mp_type_array; + type = (typecode == BYTEARRAY_TYPECODE) ? &mp_type_bytearray : &mp_type_array; #elif MICROPY_PY_BUILTINS_BYTEARRAY - o->base.type = &mp_type_bytearray; + type = &mp_type_bytearray; #else - o->base.type = &mp_type_array; + type = &mp_type_array; #endif + mp_obj_array_t *o = mp_obj_malloc(mp_obj_array_t, type); o->typecode = typecode; o->free = 0; o->len = n; - o->items = m_new(byte, typecode_size * o->len); + o->items = m_malloc(typecode_size * o->len); return o; } #endif @@ -225,7 +227,7 @@ static mp_obj_t bytearray_make_new(const mp_obj_type_t *type_in, size_t n_args, #if MICROPY_PY_BUILTINS_MEMORYVIEW mp_obj_t mp_obj_new_memoryview(byte typecode, size_t nitems, void *items) { - mp_obj_array_t *self = m_new_obj(mp_obj_array_t); + mp_obj_array_t *self = mp_obj_malloc(mp_obj_array_t, &mp_type_memoryview); mp_obj_memoryview_init(self, typecode, 0, nitems, items); return MP_OBJ_FROM_PTR(self); } @@ -684,7 +686,7 @@ static mp_obj_t array_subscr(mp_obj_t self_in, mp_obj_t index_in, mp_obj_t value if (slice.start > memview_offset_max) { mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("memoryview offset too large")); } - res = m_new_obj(mp_obj_array_t); + res = mp_obj_malloc(mp_obj_array_t, &mp_type_memoryview); *res = *o; res->memview_offset += slice.start; res->len = slice.stop - slice.start; diff --git a/py/objclosure.c b/py/objclosure.c index 3ba507b959..04a3aa53a0 100644 --- a/py/objclosure.c +++ b/py/objclosure.c @@ -50,7 +50,7 @@ static mp_obj_t closure_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const return mp_call_function_n_kw(self->fun, self->n_closed + n_args, n_kw, args2); } else { // use heap to allocate temporary args array - mp_obj_t *args2 = m_new(mp_obj_t, n_total); + mp_obj_t *args2 = m_malloc_items(n_total); memcpy(args2, self->closed, self->n_closed * sizeof(mp_obj_t)); memcpy(args2 + self->n_closed, args, (n_args + 2 * n_kw) * sizeof(mp_obj_t)); mp_obj_t res = mp_call_function_n_kw(self->fun, self->n_closed + n_args, n_kw, args2); diff --git a/py/objdeque.c b/py/objdeque.c index 583537017f..3a1db4946d 100644 --- a/py/objdeque.c +++ b/py/objdeque.c @@ -58,7 +58,7 @@ static mp_obj_t deque_make_new(const mp_obj_type_t *type, size_t n_args, size_t mp_obj_deque_t *o = mp_obj_malloc(mp_obj_deque_t, type); o->alloc = maxlen + 1; o->i_get = o->i_put = 0; - o->items = m_new0(mp_obj_t, o->alloc); + o->items = m_malloc_items(o->alloc); if (n_args > 2) { o->flags = mp_obj_get_int(args[2]); diff --git a/py/objdict.c b/py/objdict.c index 7094a1c1f9..79a606f097 100644 --- a/py/objdict.c +++ b/py/objdict.c @@ -747,7 +747,8 @@ void mp_obj_dict_init(mp_obj_dict_t *dict, size_t n_args) { } mp_obj_t mp_obj_new_dict(size_t n_args) { - mp_obj_dict_t *o = m_new_obj(mp_obj_dict_t); + // CIRCUITPY-CHANGE: Use mp_obj_malloc because it is a Python object + mp_obj_dict_t *o = mp_obj_malloc(mp_obj_dict_t, &mp_type_dict); mp_obj_dict_init(o, n_args); return MP_OBJ_FROM_PTR(o); } diff --git a/py/objexcept.c b/py/objexcept.c index 70fdc15df4..2c045d721f 100644 --- a/py/objexcept.c +++ b/py/objexcept.c @@ -94,7 +94,7 @@ mp_obj_t mp_alloc_emergency_exception_buf(mp_obj_t size_in) { mp_int_t size = mp_obj_get_int(size_in); void *buf = NULL; if (size > 0) { - buf = m_new(byte, size); + buf = m_malloc_with_collect(size); } int old_size = mp_emergency_exception_buf_size; @@ -220,7 +220,7 @@ mp_obj_t mp_obj_exception_make_new(const mp_obj_type_t *type, size_t n_args, siz mp_arg_check_num(n_args, n_kw, 0, MP_OBJ_FUN_ARGS_MAX, false); // Try to allocate memory for the exception, with fallback to emergency exception object - mp_obj_exception_t *o_exc = m_new_obj_maybe(mp_obj_exception_t); + mp_obj_exception_t *o_exc = m_malloc_maybe_with_collect(sizeof(mp_obj_exception_t)); if (o_exc == NULL) { o_exc = &MP_STATE_VM(mp_emergency_exception_obj); } @@ -544,7 +544,7 @@ mp_obj_t mp_obj_new_exception_msg_vlist(const mp_obj_type_t *exc_type, mp_rom_er // CIRCUITPY-CHANGE: here and more below size_t o_str_alloc = decompress_length(fmt); if (gc_alloc_possible()) { - o_str = m_new_obj_maybe(mp_obj_str_t); + o_str = m_malloc_maybe_with_collect(sizeof(mp_obj_str_t)); o_str_buf = m_new_maybe(byte, o_str_alloc); } @@ -661,7 +661,7 @@ void mp_obj_exception_add_traceback(mp_obj_t self_in, qstr file, size_t line, qs // Try to allocate memory for the traceback, with fallback to emergency traceback object if (self->traceback == NULL || self->traceback == (mp_obj_traceback_t *)&mp_const_empty_traceback_obj) { - self->traceback = m_new_obj_maybe(mp_obj_traceback_t); + self->traceback = m_malloc_maybe_with_collect(sizeof(mp_obj_traceback_t)); if (self->traceback == NULL) { self->traceback = &MP_STATE_VM(mp_emergency_traceback_obj); } diff --git a/py/objfloat.c b/py/objfloat.c index 6f248cdadf..fa5a26b438 100644 --- a/py/objfloat.c +++ b/py/objfloat.c @@ -198,9 +198,8 @@ MP_DEFINE_CONST_OBJ_TYPE( #if MICROPY_OBJ_REPR != MICROPY_OBJ_REPR_C && MICROPY_OBJ_REPR != MICROPY_OBJ_REPR_D mp_obj_t mp_obj_new_float(mp_float_t value) { - // Don't use mp_obj_malloc here to avoid extra function call overhead. - mp_obj_float_t *o = m_new_obj(mp_obj_float_t); - o->base.type = &mp_type_float; + // CIRCUITPY-CHANGE: Use mp_obj_malloc because it is a Python object + mp_obj_float_t *o = mp_obj_malloc(mp_obj_float_t, &mp_type_float); o->value = value; return MP_OBJ_FROM_PTR(o); } diff --git a/py/objlist.c b/py/objlist.c index 2c1545d877..d0b6fd4b3e 100644 --- a/py/objlist.c +++ b/py/objlist.c @@ -504,12 +504,14 @@ void mp_obj_list_init(mp_obj_list_t *o, size_t n) { o->base.type = &mp_type_list; o->alloc = n < LIST_MIN_ALLOC ? LIST_MIN_ALLOC : n; o->len = n; - o->items = m_new(mp_obj_t, o->alloc); + // CIRCUITPY-CHANGE: Use m_malloc_items because these are mp_obj_t + o->items = m_malloc_items(o->alloc); mp_seq_clear(o->items, n, o->alloc, sizeof(*o->items)); } static mp_obj_list_t *list_new(size_t n) { - mp_obj_list_t *o = m_new_obj(mp_obj_list_t); + // CIRCUITPY-CHANGE: Use mp_obj_malloc because it is a Python object + mp_obj_list_t *o = mp_obj_malloc(mp_obj_list_t, &mp_type_list); mp_obj_list_init(o, n); return o; } diff --git a/py/objmap.c b/py/objmap.c index 1911a7510a..d8042f867c 100644 --- a/py/objmap.c +++ b/py/objmap.c @@ -50,7 +50,8 @@ static mp_obj_t map_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_ static mp_obj_t map_iternext(mp_obj_t self_in) { mp_check_self(mp_obj_is_type(self_in, &mp_type_map)); mp_obj_map_t *self = MP_OBJ_TO_PTR(self_in); - mp_obj_t *nextses = m_new(mp_obj_t, self->n_iters); + // CIRCUITPY-CHANGE: Use m_malloc_items because it is an array of objects + mp_obj_t *nextses = m_malloc_items(self->n_iters); for (size_t i = 0; i < self->n_iters; i++) { mp_obj_t next = mp_iternext(self->iters[i]); diff --git a/py/objmodule.c b/py/objmodule.c index 3ccd31b23a..a5c1dee968 100644 --- a/py/objmodule.c +++ b/py/objmodule.c @@ -134,8 +134,8 @@ mp_obj_t mp_obj_new_module(qstr module_name) { } // create new module object - mp_module_context_t *o = m_new_obj(mp_module_context_t); - o->module.base.type = &mp_type_module; + // CIRCUITPY-CHANGE: Use mp_obj_malloc because it is a Python object + mp_module_context_t *o = mp_obj_malloc(mp_module_context_t, &mp_type_module); o->module.globals = MP_OBJ_TO_PTR(mp_obj_new_dict(MICROPY_MODULE_DICT_SIZE)); // store __name__ entry in the module diff --git a/py/objproperty.c b/py/objproperty.c index a3a13a71bb..96563f6dba 100644 --- a/py/objproperty.c +++ b/py/objproperty.c @@ -57,7 +57,8 @@ static mp_obj_t property_make_new(const mp_obj_type_t *type, size_t n_args, size } static mp_obj_t property_getter(mp_obj_t self_in, mp_obj_t getter) { - mp_obj_property_t *p2 = m_new_obj(mp_obj_property_t); + // CIRCUITPY-CHANGE: Use mp_obj_malloc because it is a Python object + mp_obj_property_t *p2 = mp_obj_malloc(mp_obj_property_t, &mp_type_property); *p2 = *(mp_obj_property_t *)MP_OBJ_TO_PTR(self_in); p2->proxy[0] = getter; return MP_OBJ_FROM_PTR(p2); @@ -66,7 +67,8 @@ static mp_obj_t property_getter(mp_obj_t self_in, mp_obj_t getter) { static MP_DEFINE_CONST_FUN_OBJ_2(property_getter_obj, property_getter); static mp_obj_t property_setter(mp_obj_t self_in, mp_obj_t setter) { - mp_obj_property_t *p2 = m_new_obj(mp_obj_property_t); + // CIRCUITPY-CHANGE: Use mp_obj_malloc because it is a Python object + mp_obj_property_t *p2 = mp_obj_malloc(mp_obj_property_t, &mp_type_property); *p2 = *(mp_obj_property_t *)MP_OBJ_TO_PTR(self_in); p2->proxy[1] = setter; return MP_OBJ_FROM_PTR(p2); @@ -75,7 +77,8 @@ static mp_obj_t property_setter(mp_obj_t self_in, mp_obj_t setter) { static MP_DEFINE_CONST_FUN_OBJ_2(property_setter_obj, property_setter); static mp_obj_t property_deleter(mp_obj_t self_in, mp_obj_t deleter) { - mp_obj_property_t *p2 = m_new_obj(mp_obj_property_t); + // CIRCUITPY-CHANGE: Use mp_obj_malloc because it is a Python object + mp_obj_property_t *p2 = mp_obj_malloc(mp_obj_property_t, &mp_type_property); *p2 = *(mp_obj_property_t *)MP_OBJ_TO_PTR(self_in); p2->proxy[2] = deleter; return MP_OBJ_FROM_PTR(p2); diff --git a/py/objtuple.c b/py/objtuple.c index ec1545abb8..3d4bea7640 100644 --- a/py/objtuple.c +++ b/py/objtuple.c @@ -86,7 +86,7 @@ static mp_obj_t mp_obj_tuple_make_new(const mp_obj_type_t *type_in, size_t n_arg size_t alloc = 4; size_t len = 0; - mp_obj_t *items = m_new(mp_obj_t, alloc); + mp_obj_t *items = m_malloc_items(alloc); mp_obj_t iterable = mp_getiter(args[0], NULL); mp_obj_t item; diff --git a/py/objtype.c b/py/objtype.c index c2ef10ee9e..9c77539979 100644 --- a/py/objtype.c +++ b/py/objtype.c @@ -96,7 +96,7 @@ static mp_obj_t native_base_init_wrapper(size_t n_args, const mp_obj_t *pos_args pos_args++; n_args--; - mp_obj_t *args2 = m_new(mp_obj_t, n_args + 2 * n_kw); + mp_obj_t *args2 = m_malloc_items(n_args + 2 * n_kw); // copy in args memcpy(args2, pos_args, n_args * sizeof(mp_obj_t)); // copy in kwargs @@ -340,7 +340,7 @@ static mp_obj_t mp_obj_instance_make_new(const mp_obj_type_t *self, size_t n_arg mp_obj_t args2[1] = {MP_OBJ_FROM_PTR(self)}; new_ret = mp_call_function_n_kw(init_fn[0], 1, 0, args2); } else { - mp_obj_t *args2 = m_new(mp_obj_t, 1 + n_args + 2 * n_kw); + mp_obj_t *args2 = m_malloc_items(1 + n_args + 2 * n_kw); args2[0] = MP_OBJ_FROM_PTR(self); memcpy(args2 + 1, args, (n_args + 2 * n_kw) * sizeof(mp_obj_t)); new_ret = mp_call_function_n_kw(init_fn[0], n_args + 1, n_kw, args2); @@ -371,7 +371,7 @@ static mp_obj_t mp_obj_instance_make_new(const mp_obj_type_t *self, size_t n_arg if (n_args == 0 && n_kw == 0) { init_ret = mp_call_method_n_kw(0, 0, init_fn); } else { - mp_obj_t *args2 = m_new(mp_obj_t, 2 + n_args + 2 * n_kw); + mp_obj_t *args2 = m_malloc_items(2 + n_args + 2 * n_kw); args2[0] = init_fn[0]; args2[1] = init_fn[1]; memcpy(args2 + 2, args, (n_args + 2 * n_kw) * sizeof(mp_obj_t)); @@ -1513,13 +1513,14 @@ mp_obj_t mp_obj_cast_to_native_base(mp_obj_t self_in, mp_const_obj_t native_type /******************************************************************************/ // staticmethod and classmethod types (probably should go in a different file) -static mp_obj_t static_class_method_make_new(const mp_obj_type_t *self, size_t n_args, size_t n_kw, const mp_obj_t *args) { - assert(self == &mp_type_staticmethod || self == &mp_type_classmethod); +static mp_obj_t static_class_method_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_kw, const mp_obj_t *args) { + assert(type == &mp_type_staticmethod || type == &mp_type_classmethod); mp_arg_check_num(n_args, n_kw, 1, 1, false); - mp_obj_static_class_method_t *o = m_new_obj(mp_obj_static_class_method_t); - *o = (mp_obj_static_class_method_t) {{self}, args[0]}; + // CIRCUITPY-CHANGE: Use mp_obj_malloc because it is a Python object + mp_obj_static_class_method_t *o = mp_obj_malloc(mp_obj_static_class_method_t, type); + o->fun = args[0]; return MP_OBJ_FROM_PTR(o); } diff --git a/py/parse.c b/py/parse.c index 9721532afd..b48e1e0d05 100644 --- a/py/parse.c +++ b/py/parse.c @@ -286,7 +286,7 @@ static void *parser_alloc(parser_t *parser, size_t num_bytes) { if (alloc < num_bytes) { alloc = num_bytes; } - chunk = (mp_parse_chunk_t *)m_new(byte, sizeof(mp_parse_chunk_t) + alloc); + chunk = (mp_parse_chunk_t *)m_malloc_with_collect(sizeof(mp_parse_chunk_t) + alloc); chunk->alloc = alloc; chunk->union_.used = 0; parser->cur_chunk = chunk; @@ -1055,7 +1055,7 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) { parser.result_stack_top = 0; parser.result_stack = NULL; while (parser.result_stack_alloc > 1) { - parser.result_stack = m_new_maybe(mp_parse_node_t, parser.result_stack_alloc); + parser.result_stack = m_malloc_maybe_with_collect(sizeof(mp_parse_node_t) * parser.result_stack_alloc); if (parser.result_stack != NULL) { break; } else { diff --git a/py/persistentcode.c b/py/persistentcode.c index 09beeef451..bdc31f3cd4 100644 --- a/py/persistentcode.c +++ b/py/persistentcode.c @@ -318,7 +318,8 @@ static mp_raw_code_t *load_raw_code(mp_reader_t *reader, mp_module_context_t *co // Load children if any. if (has_children) { n_children = read_uint(reader); - children = m_new(mp_raw_code_t *, n_children + (kind == MP_CODE_NATIVE_PY)); + // CIRCUITPY-CHANGE: Collect children pointers + children = m_malloc_with_collect(sizeof(mp_raw_code_t *) * (n_children + (kind == MP_CODE_NATIVE_PY))); for (size_t i = 0; i < n_children; ++i) { children[i] = load_raw_code(reader, context); } diff --git a/py/pystack.h b/py/pystack.h index ea8fddcf2f..64a9468a43 100644 --- a/py/pystack.h +++ b/py/pystack.h @@ -82,7 +82,8 @@ static inline void mp_local_free(void *ptr) { } static inline void *mp_nonlocal_alloc(size_t n_bytes) { - return m_new(uint8_t, n_bytes); + // CIRCUITPY-CHANGE: Collect the allocated memory because it holds function arguments. + return m_malloc_with_collect(n_bytes); } static inline void *mp_nonlocal_realloc(void *ptr, size_t old_n_bytes, size_t new_n_bytes) { diff --git a/py/qstr.c b/py/qstr.c index 6a5896bdaf..ac2ff23fd3 100644 --- a/py/qstr.c +++ b/py/qstr.c @@ -250,7 +250,8 @@ static qstr qstr_add(mp_uint_t len, const char *q_ptr) { + sizeof(qstr_hash_t) #endif + sizeof(qstr_len_t)) * new_alloc; - qstr_pool_t *pool = (qstr_pool_t *)m_malloc_maybe(pool_size); + // CIRCUITPY-CHANGE: Use m_malloc_helper because pools reference previous pools + qstr_pool_t *pool = (qstr_pool_t *)m_malloc_helper(pool_size, M_MALLOC_COLLECT); if (pool == NULL) { // Keep qstr_last_chunk consistent with qstr_pool_t: qstr_last_chunk is not scanned // at garbage collection since it's reachable from a qstr_pool_t. And the caller of diff --git a/py/runtime.c b/py/runtime.c index 29fcd04490..b2902ed8d3 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -1387,7 +1387,8 @@ mp_obj_t mp_getiter(mp_obj_t o_in, mp_obj_iter_buf_t *iter_buf) { if (iter_buf == NULL && MP_OBJ_TYPE_GET_SLOT(type, iter) != mp_obj_instance_getiter) { // if caller did not provide a buffer then allocate one on the heap // mp_obj_instance_getiter is special, it will allocate only if needed - iter_buf = m_new_obj(mp_obj_iter_buf_t); + // CIRCUITPY-CHANGE: Use m_new_struct_with_collect because iters reference the iternext function + iter_buf = m_new_struct_with_collect(mp_obj_iter_buf_t, 1); } mp_getiter_fun_t getiter; if (type->flags & MP_TYPE_FLAG_ITER_IS_CUSTOM) { @@ -1408,7 +1409,8 @@ mp_obj_t mp_getiter(mp_obj_t o_in, mp_obj_iter_buf_t *iter_buf) { // __getitem__ exists, create and return an iterator if (iter_buf == NULL) { // if caller did not provide a buffer then allocate one on the heap - iter_buf = m_new_obj(mp_obj_iter_buf_t); + // CIRCUITPY-CHANGE: Use m_new_struct_with_collect because iters reference the iternext function + iter_buf = m_new_struct_with_collect(mp_obj_iter_buf_t, 1); } return mp_obj_new_getitem_iter(dest, iter_buf); } diff --git a/py/scope.c b/py/scope.c index 4893e7cc4e..c6ce05d6d8 100644 --- a/py/scope.c +++ b/py/scope.c @@ -51,7 +51,7 @@ scope_t *scope_new(scope_kind_t kind, mp_parse_node_t pn, mp_uint_t emit_options MP_STATIC_ASSERT(MP_QSTR__lt_setcomp_gt_ <= UINT8_MAX); MP_STATIC_ASSERT(MP_QSTR__lt_genexpr_gt_ <= UINT8_MAX); - scope_t *scope = m_new0(scope_t, 1); + scope_t *scope = m_new_struct_with_collect(scope_t, 1); scope->kind = kind; scope->pn = pn; if (kind == SCOPE_FUNCTION || kind == SCOPE_CLASS) { diff --git a/shared-bindings/displayio/Group.c b/shared-bindings/displayio/Group.c index e5f5f03978..94ee2e58de 100644 --- a/shared-bindings/displayio/Group.c +++ b/shared-bindings/displayio/Group.c @@ -317,12 +317,14 @@ static mp_obj_t group_subscr(mp_obj_t self_in, mp_obj_t index_obj, mp_obj_t valu //| static mp_obj_t displayio_group_obj_sort(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) { displayio_group_t *self = native_group(pos_args[0]); - mp_obj_t *args = m_new(mp_obj_t, n_args); + mp_obj_t *args = m_malloc_items(n_args); for (size_t i = 1; i < n_args; ++i) { args[i] = pos_args[i]; } args[0] = MP_OBJ_FROM_PTR(self->members); - return mp_obj_list_sort(n_args, args, kw_args); + mp_obj_t res = mp_obj_list_sort(n_args, args, kw_args); + m_del(mp_obj_t, args, n_args); + return res; } MP_DEFINE_CONST_FUN_OBJ_KW(displayio_group_sort_obj, 1, displayio_group_obj_sort); diff --git a/shared-module/fontio/BuiltinFont.c b/shared-module/fontio/BuiltinFont.c index e24fe9c7f3..03ef933c55 100644 --- a/shared-module/fontio/BuiltinFont.c +++ b/shared-module/fontio/BuiltinFont.c @@ -16,7 +16,8 @@ mp_obj_t common_hal_fontio_builtinfont_get_bitmap(const fontio_builtinfont_t *se } mp_obj_t common_hal_fontio_builtinfont_get_bounding_box(const fontio_builtinfont_t *self) { - mp_obj_t *items = m_new(mp_obj_t, 2); + // Stack allocation is ok because tuple copies the values out. + mp_obj_t items[2]; items[0] = MP_OBJ_NEW_SMALL_INT(self->width); items[1] = MP_OBJ_NEW_SMALL_INT(self->height); return mp_obj_new_tuple(2, items);