diff --git a/py/misc.h b/py/misc.h
index e034485838..643f13a19f 100644
--- a/py/misc.h
+++ b/py/misc.h
@@ -428,7 +428,7 @@ static inline uint32_t mp_clz_mpi(mp_int_t x) {
     #endif
 }
 
-// Overflow-checked operations for long long
+// Overflow-checked operations
 
 // Integer overflow builtins were added to GCC 5, but __has_builtin only in GCC 10
 //
@@ -436,8 +436,28 @@ static inline uint32_t mp_clz_mpi(mp_int_t x) {
 // functions below don't update the result if an overflow would occur (to avoid UB).
 #define MP_GCC_HAS_BUILTIN_OVERFLOW (__GNUC__ >= 5)
 
-#if __has_builtin(__builtin_umulll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW
+// <limits.h> may not define these macros when gcc is in C++ mode.
+#ifndef ULLONG_MAX
+#define ULLONG_MAX (~0ULL)
+#endif
+
+#ifndef LLONG_MAX
+#define LLONG_MAX ((long long)(ULLONG_MAX >> 1))
+#endif
+
+#ifndef LLONG_MIN
+#define LLONG_MIN (-LLONG_MAX - 1)
+#endif
+
+
+#if MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC
 #define mp_mul_ull_overflow __builtin_umulll_overflow
+#define mp_mul_ll_overflow __builtin_smulll_overflow
+inline static bool mp_mul_mp_int_t_overflow(mp_int_t x, mp_int_t y, mp_int_t *res) {
+    // __builtin_mul_overflow is a type-generic function, this inline ensures the argument
+    // types are checked to match mp_int_t.
+    return __builtin_mul_overflow(x, y, res);
+}
 #else
 inline static bool mp_mul_ull_overflow(unsigned long long int x, unsigned long long int y, unsigned long long int *res) {
     if (y > 0 && x > (ULLONG_MAX / y)) {
@@ -446,11 +466,7 @@ inline static bool mp_mul_ull_overflow(unsigned long long int x, unsigned long l
     *res = x * y;
     return false;
 }
-#endif
 
-#if __has_builtin(__builtin_smulll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW
-#define mp_mul_ll_overflow __builtin_smulll_overflow
-#else
 inline static bool mp_mul_ll_overflow(long long int x, long long int y, long long int *res) {
     bool overflow;
 
@@ -475,6 +491,8 @@ inline static bool mp_mul_ll_overflow(long long int x, long long int y, long lon
 
     return overflow;
 }
+
+extern bool mp_mul_mp_int_t_overflow(mp_int_t x, mp_int_t y, mp_int_t *res);
 #endif
 
 #if __has_builtin(__builtin_saddll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW
diff --git a/py/mpconfig.h b/py/mpconfig.h
index 810dc83f32..9d7a3ee3dc 100644
--- a/py/mpconfig.h
+++ b/py/mpconfig.h
@@ -2279,19 +2279,19 @@ typedef time_t mp_timestamp_t;
 #endif
 
 // If true, use __builtin_mul_overflow (a gcc intrinsic supported by clang) for
-// overflow checking when multiplying two small ints. Otherwise, use the
-// routine mp_small_int_mul_overflow.
+// overflow checking when multiplying two small ints. Otherwise, use a portable
+// algorithm.
 //
-// On MCUs with a 32x32->64 bit multiply instruction (such as Cortex M4, Cortex M33)
-// this is likely to be faster and generate smaller code.
+// Most MCUs have a with a 32x32->64 bit multiply instruction, in which case the
+// intrinsic is likely to be faster and generate smaller code. The main exception is
+// cortex-m0 with __ARM_ARCH_ISA_THUMB == 1.
 //
-// The semantics of mp_small_int_mul_overflow. and__builtin_mul_overflow are not quite the
-// same: mp_small_int_mul_overflow additionally checks that the result fits within a
-// small integer, not just within mp_int_t.
+// The intrinsic is in GCC from version 5. In principle it can be detected instead with
+// __has_builtin except this is only in GCC from version 5.
 #ifndef MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC
-#if defined(__ARM_ARCH_ISA_THUMB) && defined(__GNUC__)
+#if defined(__ARM_ARCH_ISA_THUMB) && (__GNUC__ >= 5)
 #define MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC (__ARM_ARCH_ISA_THUMB >= 2)
-#elif (defined(__riscv_m) || defined(__x86_64__) || defined(__i686__)) && defined(__GNUC__)
+#elif (__GNUC__ >= 5)
 #define MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC (1)
 #else
 #define MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC (0)
diff --git a/py/parsenum.c b/py/parsenum.c
index 40b428956e..1b579eef5b 100644
--- a/py/parsenum.c
+++ b/py/parsenum.c
@@ -28,6 +28,7 @@
 #include <stdlib.h>
 
 #include "py/runtime.h"
+#include "py/misc.h"
 #include "py/parsenumbase.h"
 #include "py/parsenum.h"
 #include "py/smallint.h"
@@ -55,7 +56,7 @@ typedef mp_int_t parsed_int_t;
 #if MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC
 #define PARSED_INT_MUL_OVERFLOW __builtin_mul_overflow
 #else
-#define PARSED_INT_MUL_OVERFLOW mp_small_int_mul_overflow
+#define PARSED_INT_MUL_OVERFLOW mp_mul_mp_int_t_overflow
 #endif
 #define PARSED_INT_FITS MP_SMALL_INT_FITS
 #else
diff --git a/py/runtime.c b/py/runtime.c
index 39e72f4a49..1a54a65de2 100644
--- a/py/runtime.c
+++ b/py/runtime.c
@@ -490,24 +490,8 @@ mp_obj_t MICROPY_WRAP_MP_BINARY_OP(mp_binary_op)(mp_binary_op_t op, mp_obj_t lhs
                 case MP_BINARY_OP_MULTIPLY:
                 case MP_BINARY_OP_INPLACE_MULTIPLY: {
 
-                    // If long long type exists and is larger than mp_int_t, then
-                    // we can use the following code to perform overflow-checked multiplication.
-                    // Otherwise (eg in x64 case) we must use mp_small_int_mul_overflow.
-                    #if 0
-                    // compute result using long long precision
-                    long long res = (long long)lhs_val * (long long)rhs_val;
-                    if (res > MP_SMALL_INT_MAX || res < MP_SMALL_INT_MIN) {
-                        // result overflowed SMALL_INT, so return higher precision integer
-                        return mp_obj_new_int_from_ll(res);
-                    } else {
-                        // use standard precision
-                        lhs_val = (mp_int_t)res;
-                    }
-                    #endif
-
                     mp_int_t int_res;
-                    #if MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC
-                    if (__builtin_mul_overflow(lhs_val, rhs_val, &int_res)) {
+                    if (mp_mul_mp_int_t_overflow(lhs_val, rhs_val, &int_res)) {
                         lhs = mp_obj_new_int_from_ll(lhs_val);
                         goto generic_binary_op;
                     } else {
@@ -515,16 +499,6 @@ mp_obj_t MICROPY_WRAP_MP_BINARY_OP(mp_binary_op)(mp_binary_op_t op, mp_obj_t lhs
                     }
 
                     break; // result fits in mp_int_t but might not be MP_SMALL_INT_FITS
-                    #else
-                    if (mp_small_int_mul_overflow(lhs_val, rhs_val, &int_res)) {
-                        // use higher precision
-                        lhs = mp_obj_new_int_from_ll(lhs_val);
-                        goto generic_binary_op;
-                    } else {
-                        // use standard precision
-                        return MP_OBJ_NEW_SMALL_INT(int_res);
-                    }
-                    #endif
                 }
                 case MP_BINARY_OP_FLOOR_DIVIDE:
                 case MP_BINARY_OP_INPLACE_FLOOR_DIVIDE:
@@ -564,30 +538,18 @@ mp_obj_t MICROPY_WRAP_MP_BINARY_OP(mp_binary_op)(mp_binary_op_t op, mp_obj_t lhs
                         mp_int_t ans = 1;
                         while (rhs_val > 0) {
                             if (rhs_val & 1) {
-                                #if MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC
-                                if (__builtin_mul_overflow(ans, lhs_val, &ans)) {
+                                if (mp_mul_mp_int_t_overflow(ans, lhs_val, &ans)) {
                                     goto power_overflow;
                                 }
-                                #else
-                                if (mp_small_int_mul_overflow(ans, lhs_val, &ans)) {
-                                    goto power_overflow;
-                                }
-                                #endif
                             }
                             if (rhs_val == 1) {
                                 break;
                             }
                             rhs_val /= 2;
                             mp_int_t int_res;
-                            #if MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC
-                            if (__builtin_mul_overflow(lhs_val, lhs_val, &int_res)) {
+                            if (mp_mul_mp_int_t_overflow(lhs_val, lhs_val, &int_res)) {
                                 goto power_overflow;
                             }
-                            #else
-                            if (mp_small_int_mul_overflow(lhs_val, lhs_val, &int_res)) {
-                                goto power_overflow;
-                            }
-                            #endif
                             lhs_val = int_res;
                         }
                         lhs_val = ans;
diff --git a/py/smallint.c b/py/smallint.c
index a494093d61..219ea29e4d 100644
--- a/py/smallint.c
+++ b/py/smallint.c
@@ -26,25 +26,30 @@
 
 #include "py/smallint.h"
 
-bool mp_small_int_mul_overflow(mp_int_t x, mp_int_t y, mp_int_t *res) {
+#if !MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC
+#define MP_UINT_MAX (~(mp_uint_t)0)
+#define MP_INT_MAX ((mp_int_t)(MP_UINT_MAX >> 1))
+#define MP_INT_MIN (-MP_INT_MAX - 1)
+
+bool mp_mul_mp_int_t_overflow(mp_int_t x, mp_int_t y, mp_int_t *res) {
     // Check for multiply overflow; see CERT INT32-C
     if (x > 0) { // x is positive
         if (y > 0) { // x and y are positive
-            if (x > (MP_SMALL_INT_MAX / y)) {
+            if (x > (MP_INT_MAX / y)) {
                 return true;
             }
         } else { // x positive, y nonpositive
-            if (y < (MP_SMALL_INT_MIN / x)) {
+            if (y < (MP_INT_MIN / x)) {
                 return true;
             }
         } // x positive, y nonpositive
     } else { // x is nonpositive
         if (y > 0) { // x is nonpositive, y is positive
-            if (x < (MP_SMALL_INT_MIN / y)) {
+            if (x < (MP_INT_MIN / y)) {
                 return true;
             }
         } else { // x and y are nonpositive
-            if (x != 0 && y < (MP_SMALL_INT_MAX / x)) {
+            if (x != 0 && y < (MP_INT_MAX / x)) {
                 return true;
             }
         } // End if x and y are nonpositive
@@ -54,6 +59,7 @@ bool mp_small_int_mul_overflow(mp_int_t x, mp_int_t y, mp_int_t *res) {
     *res = x * y;
     return false;
 }
+#endif
 
 mp_int_t mp_small_int_modulo(mp_int_t dividend, mp_int_t divisor) {
     // Python specs require that mod has same sign as second operand
diff --git a/py/smallint.h b/py/smallint.h
index e50f98651e..ec5b0af3b2 100644
--- a/py/smallint.h
+++ b/py/smallint.h
@@ -68,10 +68,6 @@
 // The number of bits in a MP_SMALL_INT including the sign bit.
 #define MP_SMALL_INT_BITS (MP_IMAX_BITS(MP_SMALL_INT_MAX) + 1)
 
-// Multiply two small ints.
-// If returns false, the correct result is stored in 'res'
-// If returns true, the multiplication would have overflowed. 'res' is unchanged.
-bool mp_small_int_mul_overflow(mp_int_t x, mp_int_t y, mp_int_t *res);
 mp_int_t mp_small_int_modulo(mp_int_t dividend, mp_int_t divisor);
 mp_int_t mp_small_int_floor_divide(mp_int_t num, mp_int_t denom);