py/misc: Add a popcount(uint32_t) implementation.

This makes the existing popcount(uint32_t) implementation found in the
RV32 emitter available to the rest of the codebase.  This version of
popcount will use intrinsic or builtin implementations if they are
available, falling back to a generic implementation if that is not the
case.

Signed-off-by: Alessandro Gatti <a.gatti@frob.it>
This commit is contained in:
Alessandro Gatti 2024-08-22 00:48:55 +02:00
parent a5270c84cf
commit 3044233ea3
2 changed files with 19 additions and 22 deletions

View file

@ -29,6 +29,7 @@
#include <string.h>
#include "py/emit.h"
#include "py/misc.h"
#include "py/mpconfig.h"
// wrapper around everything in this file
@ -43,27 +44,6 @@
#define DEBUG_printf(...) (void)0
#endif
#ifndef MP_POPCOUNT
#ifdef _MSC_VER
#include <intrin.h>
#define MP_POPCOUNT __popcnt
#else
#if defined __has_builtin
#if __has_builtin(__builtin_popcount)
#define MP_POPCOUNT __builtin_popcount
#endif
#else
static uint32_t fallback_popcount(uint32_t value) {
value = value - ((value >> 1) & 0x55555555);
value = (value & 0x33333333) + ((value >> 2) & 0x33333333);
value = (value + (value >> 4)) & 0x0F0F0F0F;
return value * 0x01010101;
}
#define MP_POPCOUNT fallback_popcount
#endif
#endif
#endif
#define INTERNAL_TEMPORARY ASM_RV32_REG_S0
#define AVAILABLE_REGISTERS_COUNT 32
@ -249,7 +229,7 @@ static void adjust_stack(asm_rv32_t *state, mp_int_t stack_size) {
// stack to hold all the tainted registers and an arbitrary amount of space
// for locals.
static void emit_function_prologue(asm_rv32_t *state, mp_uint_t registers) {
mp_uint_t registers_count = MP_POPCOUNT(registers);
mp_uint_t registers_count = mp_popcount(registers);
state->stack_size = (registers_count + state->locals_count) * sizeof(uint32_t);
mp_uint_t old_saved_registers_mask = state->saved_registers_mask;
// Move stack pointer up.

View file

@ -370,12 +370,29 @@ static inline uint32_t mp_ctz(uint32_t x) {
static inline bool mp_check(bool value) {
return value;
}
static inline uint32_t mp_popcount(uint32_t x) {
return __popcnt(x);
}
#else
#define mp_clz(x) __builtin_clz(x)
#define mp_clzl(x) __builtin_clzl(x)
#define mp_clzll(x) __builtin_clzll(x)
#define mp_ctz(x) __builtin_ctz(x)
#define mp_check(x) (x)
#if defined __has_builtin
#if __has_builtin(__builtin_popcount)
#define mp_popcount(x) __builtin_popcount(x)
#endif
#endif
#if !defined(mp_popcount)
static inline uint32_t mp_popcount(uint32_t x) {
x = x - ((x >> 1) & 0x55555555);
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
x = (x + (x >> 4)) & 0x0F0F0F0F;
return x * 0x01010101;
}
#endif
#endif
// mp_int_t can be larger than long, i.e. Windows 64-bit, nan-box variants