diff --git a/py/misc.h b/py/misc.h index e034485838..643f13a19f 100644 --- a/py/misc.h +++ b/py/misc.h @@ -428,7 +428,7 @@ static inline uint32_t mp_clz_mpi(mp_int_t x) { #endif } -// Overflow-checked operations for long long +// Overflow-checked operations // Integer overflow builtins were added to GCC 5, but __has_builtin only in GCC 10 // @@ -436,8 +436,28 @@ static inline uint32_t mp_clz_mpi(mp_int_t x) { // functions below don't update the result if an overflow would occur (to avoid UB). #define MP_GCC_HAS_BUILTIN_OVERFLOW (__GNUC__ >= 5) -#if __has_builtin(__builtin_umulll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW +// may not define these macros when gcc is in C++ mode. +#ifndef ULLONG_MAX +#define ULLONG_MAX (~0ULL) +#endif + +#ifndef LLONG_MAX +#define LLONG_MAX ((long long)(ULLONG_MAX >> 1)) +#endif + +#ifndef LLONG_MIN +#define LLONG_MIN (-LLONG_MAX - 1) +#endif + + +#if MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC #define mp_mul_ull_overflow __builtin_umulll_overflow +#define mp_mul_ll_overflow __builtin_smulll_overflow +inline static bool mp_mul_mp_int_t_overflow(mp_int_t x, mp_int_t y, mp_int_t *res) { + // __builtin_mul_overflow is a type-generic function, this inline ensures the argument + // types are checked to match mp_int_t. + return __builtin_mul_overflow(x, y, res); +} #else inline static bool mp_mul_ull_overflow(unsigned long long int x, unsigned long long int y, unsigned long long int *res) { if (y > 0 && x > (ULLONG_MAX / y)) { @@ -446,11 +466,7 @@ inline static bool mp_mul_ull_overflow(unsigned long long int x, unsigned long l *res = x * y; return false; } -#endif -#if __has_builtin(__builtin_smulll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW -#define mp_mul_ll_overflow __builtin_smulll_overflow -#else inline static bool mp_mul_ll_overflow(long long int x, long long int y, long long int *res) { bool overflow; @@ -475,6 +491,8 @@ inline static bool mp_mul_ll_overflow(long long int x, long long int y, long lon return overflow; } + +extern bool mp_mul_mp_int_t_overflow(mp_int_t x, mp_int_t y, mp_int_t *res); #endif #if __has_builtin(__builtin_saddll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW diff --git a/py/mpconfig.h b/py/mpconfig.h index 810dc83f32..9d7a3ee3dc 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -2279,19 +2279,19 @@ typedef time_t mp_timestamp_t; #endif // If true, use __builtin_mul_overflow (a gcc intrinsic supported by clang) for -// overflow checking when multiplying two small ints. Otherwise, use the -// routine mp_small_int_mul_overflow. +// overflow checking when multiplying two small ints. Otherwise, use a portable +// algorithm. // -// On MCUs with a 32x32->64 bit multiply instruction (such as Cortex M4, Cortex M33) -// this is likely to be faster and generate smaller code. +// Most MCUs have a with a 32x32->64 bit multiply instruction, in which case the +// intrinsic is likely to be faster and generate smaller code. The main exception is +// cortex-m0 with __ARM_ARCH_ISA_THUMB == 1. // -// The semantics of mp_small_int_mul_overflow. and__builtin_mul_overflow are not quite the -// same: mp_small_int_mul_overflow additionally checks that the result fits within a -// small integer, not just within mp_int_t. +// The intrinsic is in GCC from version 5. In principle it can be detected instead with +// __has_builtin except this is only in GCC from version 5. #ifndef MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC -#if defined(__ARM_ARCH_ISA_THUMB) && defined(__GNUC__) +#if defined(__ARM_ARCH_ISA_THUMB) && (__GNUC__ >= 5) #define MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC (__ARM_ARCH_ISA_THUMB >= 2) -#elif (defined(__riscv_m) || defined(__x86_64__) || defined(__i686__)) && defined(__GNUC__) +#elif (__GNUC__ >= 5) #define MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC (1) #else #define MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC (0) diff --git a/py/parsenum.c b/py/parsenum.c index 40b428956e..1b579eef5b 100644 --- a/py/parsenum.c +++ b/py/parsenum.c @@ -28,6 +28,7 @@ #include #include "py/runtime.h" +#include "py/misc.h" #include "py/parsenumbase.h" #include "py/parsenum.h" #include "py/smallint.h" @@ -55,7 +56,7 @@ typedef mp_int_t parsed_int_t; #if MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC #define PARSED_INT_MUL_OVERFLOW __builtin_mul_overflow #else -#define PARSED_INT_MUL_OVERFLOW mp_small_int_mul_overflow +#define PARSED_INT_MUL_OVERFLOW mp_mul_mp_int_t_overflow #endif #define PARSED_INT_FITS MP_SMALL_INT_FITS #else diff --git a/py/runtime.c b/py/runtime.c index 39e72f4a49..1a54a65de2 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -490,24 +490,8 @@ mp_obj_t MICROPY_WRAP_MP_BINARY_OP(mp_binary_op)(mp_binary_op_t op, mp_obj_t lhs case MP_BINARY_OP_MULTIPLY: case MP_BINARY_OP_INPLACE_MULTIPLY: { - // If long long type exists and is larger than mp_int_t, then - // we can use the following code to perform overflow-checked multiplication. - // Otherwise (eg in x64 case) we must use mp_small_int_mul_overflow. - #if 0 - // compute result using long long precision - long long res = (long long)lhs_val * (long long)rhs_val; - if (res > MP_SMALL_INT_MAX || res < MP_SMALL_INT_MIN) { - // result overflowed SMALL_INT, so return higher precision integer - return mp_obj_new_int_from_ll(res); - } else { - // use standard precision - lhs_val = (mp_int_t)res; - } - #endif - mp_int_t int_res; - #if MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC - if (__builtin_mul_overflow(lhs_val, rhs_val, &int_res)) { + if (mp_mul_mp_int_t_overflow(lhs_val, rhs_val, &int_res)) { lhs = mp_obj_new_int_from_ll(lhs_val); goto generic_binary_op; } else { @@ -515,16 +499,6 @@ mp_obj_t MICROPY_WRAP_MP_BINARY_OP(mp_binary_op)(mp_binary_op_t op, mp_obj_t lhs } break; // result fits in mp_int_t but might not be MP_SMALL_INT_FITS - #else - if (mp_small_int_mul_overflow(lhs_val, rhs_val, &int_res)) { - // use higher precision - lhs = mp_obj_new_int_from_ll(lhs_val); - goto generic_binary_op; - } else { - // use standard precision - return MP_OBJ_NEW_SMALL_INT(int_res); - } - #endif } case MP_BINARY_OP_FLOOR_DIVIDE: case MP_BINARY_OP_INPLACE_FLOOR_DIVIDE: @@ -564,30 +538,18 @@ mp_obj_t MICROPY_WRAP_MP_BINARY_OP(mp_binary_op)(mp_binary_op_t op, mp_obj_t lhs mp_int_t ans = 1; while (rhs_val > 0) { if (rhs_val & 1) { - #if MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC - if (__builtin_mul_overflow(ans, lhs_val, &ans)) { + if (mp_mul_mp_int_t_overflow(ans, lhs_val, &ans)) { goto power_overflow; } - #else - if (mp_small_int_mul_overflow(ans, lhs_val, &ans)) { - goto power_overflow; - } - #endif } if (rhs_val == 1) { break; } rhs_val /= 2; mp_int_t int_res; - #if MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC - if (__builtin_mul_overflow(lhs_val, lhs_val, &int_res)) { + if (mp_mul_mp_int_t_overflow(lhs_val, lhs_val, &int_res)) { goto power_overflow; } - #else - if (mp_small_int_mul_overflow(lhs_val, lhs_val, &int_res)) { - goto power_overflow; - } - #endif lhs_val = int_res; } lhs_val = ans; diff --git a/py/smallint.c b/py/smallint.c index a494093d61..219ea29e4d 100644 --- a/py/smallint.c +++ b/py/smallint.c @@ -26,25 +26,30 @@ #include "py/smallint.h" -bool mp_small_int_mul_overflow(mp_int_t x, mp_int_t y, mp_int_t *res) { +#if !MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC +#define MP_UINT_MAX (~(mp_uint_t)0) +#define MP_INT_MAX ((mp_int_t)(MP_UINT_MAX >> 1)) +#define MP_INT_MIN (-MP_INT_MAX - 1) + +bool mp_mul_mp_int_t_overflow(mp_int_t x, mp_int_t y, mp_int_t *res) { // Check for multiply overflow; see CERT INT32-C if (x > 0) { // x is positive if (y > 0) { // x and y are positive - if (x > (MP_SMALL_INT_MAX / y)) { + if (x > (MP_INT_MAX / y)) { return true; } } else { // x positive, y nonpositive - if (y < (MP_SMALL_INT_MIN / x)) { + if (y < (MP_INT_MIN / x)) { return true; } } // x positive, y nonpositive } else { // x is nonpositive if (y > 0) { // x is nonpositive, y is positive - if (x < (MP_SMALL_INT_MIN / y)) { + if (x < (MP_INT_MIN / y)) { return true; } } else { // x and y are nonpositive - if (x != 0 && y < (MP_SMALL_INT_MAX / x)) { + if (x != 0 && y < (MP_INT_MAX / x)) { return true; } } // End if x and y are nonpositive @@ -54,6 +59,7 @@ bool mp_small_int_mul_overflow(mp_int_t x, mp_int_t y, mp_int_t *res) { *res = x * y; return false; } +#endif mp_int_t mp_small_int_modulo(mp_int_t dividend, mp_int_t divisor) { // Python specs require that mod has same sign as second operand diff --git a/py/smallint.h b/py/smallint.h index e50f98651e..ec5b0af3b2 100644 --- a/py/smallint.h +++ b/py/smallint.h @@ -68,10 +68,6 @@ // The number of bits in a MP_SMALL_INT including the sign bit. #define MP_SMALL_INT_BITS (MP_IMAX_BITS(MP_SMALL_INT_MAX) + 1) -// Multiply two small ints. -// If returns false, the correct result is stored in 'res' -// If returns true, the multiplication would have overflowed. 'res' is unchanged. -bool mp_small_int_mul_overflow(mp_int_t x, mp_int_t y, mp_int_t *res); mp_int_t mp_small_int_modulo(mp_int_t dividend, mp_int_t divisor); mp_int_t mp_small_int_floor_divide(mp_int_t num, mp_int_t denom);