fix module loops

add modulo operator
2025-07-28 20:23:43 +02:00 · 2025-07-27 12:16:43 +02:00
6 changed files with 199 additions and 2 deletions
--- a/code/ndarray.c
+++ b/code/ndarray.c
@ -1703,6 +1703,12 @@ mp_obj_t ndarray_binary_op(mp_binary_op_t _op, mp_obj_t lobj, mp_obj_t robj) {
            return ndarray_binary_add(lhs, rhs, ndim, shape, lstrides, rstrides);
            break;
        #endif
+        #if NDARRAY_HAS_BINARY_OP_MODULO
+        case MP_BINARY_OP_MODULO:
+            COMPLEX_DTYPE_NOT_IMPLEMENTED(lhs->dtype);
+            return ndarray_binary_modulo(lhs, rhs, ndim, shape, lstrides, rstrides);
+            break;
+        #endif
        #if NDARRAY_HAS_BINARY_OP_MULTIPLY
        case MP_BINARY_OP_MULTIPLY:
            return ndarray_binary_multiply(lhs, rhs, ndim, shape, lstrides, rstrides);
--- a/code/ndarray_operators.c
+++ b/code/ndarray_operators.c
@ -248,6 +248,105 @@ mp_obj_t ndarray_binary_add(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
 }
 #endif /* NDARRAY_HAS_BINARY_OP_ADD */

+#if NDARRAY_HAS_BINARY_OP_MODULO
+mp_obj_t ndarray_binary_modulo(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
+                                        uint8_t ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides) {
+
+    ndarray_obj_t *results = NULL;
+    uint8_t *larray = (uint8_t *)lhs->array;
+    uint8_t *rarray = (uint8_t *)rhs->array;
+
+    if(lhs->dtype == NDARRAY_UINT8) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT8);
+            BINARY_LOOP(results, uint8_t, uint8_t, uint8_t, larray, lstrides, rarray, rstrides, %);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, uint8_t, int8_t, larray, lstrides, rarray, rstrides, %);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, uint8_t, uint16_t, larray, lstrides, rarray, rstrides, %);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, uint8_t, int16_t, larray, lstrides, rarray, rstrides, %);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            MODULO_FLOAT_LOOP(results, mp_float_t, uint8_t, mp_float_t, larray, lstrides, rarray, rstrides);
+        }
+    } else if(lhs->dtype == NDARRAY_INT8) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int8_t, uint8_t, larray, lstrides, rarray, rstrides, %);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT8);
+            BINARY_LOOP(results, int8_t, int8_t, int8_t, larray, lstrides, rarray, rstrides, %);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int8_t, int16_t, larray, lstrides, rarray, rstrides, %);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int8_t, int16_t, larray, lstrides, rarray, rstrides, %);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            MODULO_FLOAT_LOOP(results, mp_float_t, int8_t, mp_float_t, larray, lstrides, rarray, rstrides);
+        }
+    } else if(lhs->dtype == NDARRAY_UINT16) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT8);
+            BINARY_LOOP(results, uint16_t, uint16_t, uint8_t, larray, lstrides, rarray, rstrides, %);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, uint16_t, int8_t, larray, lstrides, rarray, rstrides, %);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_UINT16);
+            BINARY_LOOP(results, uint16_t, uint16_t, uint16_t, larray, lstrides, rarray, rstrides, %);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, uint16_t, int16_t, larray, lstrides, rarray, rstrides, %);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            MODULO_FLOAT_LOOP(results, mp_float_t, uint16_t, mp_float_t, larray, lstrides, rarray, rstrides);
+        }
+    } else if(lhs->dtype == NDARRAY_INT16) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int16_t, uint8_t, larray, lstrides, rarray, rstrides, %);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int16_t, int8_t, larray, lstrides, rarray, rstrides, %);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            BINARY_LOOP(results, mp_float_t, int16_t, uint16_t, larray, lstrides, rarray, rstrides, %);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_INT16);
+            BINARY_LOOP(results, int16_t, int16_t, int16_t, larray, lstrides, rarray, rstrides, %);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            MODULO_FLOAT_LOOP(results, mp_float_t, int16_t, mp_float_t, larray, lstrides, rarray, rstrides);
+        }
+    } else if(lhs->dtype == NDARRAY_FLOAT) {
+        if(rhs->dtype == NDARRAY_UINT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            MODULO_FLOAT_LOOP(results, mp_float_t, mp_float_t, uint8_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_INT8) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            MODULO_FLOAT_LOOP(results, mp_float_t, mp_float_t, int8_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_UINT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            MODULO_FLOAT_LOOP(results, mp_float_t, mp_float_t, uint16_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_INT16) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            MODULO_FLOAT_LOOP(results, mp_float_t, mp_float_t, int16_t, larray, lstrides, rarray, rstrides);
+        } else if(rhs->dtype == NDARRAY_FLOAT) {
+            results = ndarray_new_dense_ndarray(ndim, shape, NDARRAY_FLOAT);
+            MODULO_FLOAT_LOOP(results, mp_float_t, mp_float_t, mp_float_t, larray, lstrides, rarray, rstrides);
+        }
+    }
+
+    return MP_OBJ_FROM_PTR(results);
+}
+#endif /* NDARRAY_HAS_BINARY_OP_MODULO */
+
 #if NDARRAY_HAS_BINARY_OP_MULTIPLY
 mp_obj_t ndarray_binary_multiply(ndarray_obj_t *lhs, ndarray_obj_t *rhs,
                                            uint8_t ndim, size_t *shape, int32_t *lstrides, int32_t *rstrides) {
--- a/code/ndarray_operators.h
+++ b/code/ndarray_operators.h
@ -12,6 +12,7 @@

 mp_obj_t ndarray_binary_equality(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *,  int32_t *, int32_t *, mp_binary_op_t );
 mp_obj_t ndarray_binary_add(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *, int32_t *, int32_t *);
+mp_obj_t ndarray_binary_modulo(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *, int32_t *, int32_t *);
 mp_obj_t ndarray_binary_multiply(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *, int32_t *, int32_t *);
 mp_obj_t ndarray_binary_more(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *, int32_t *, int32_t *, mp_binary_op_t );
 mp_obj_t ndarray_binary_power(ndarray_obj_t *, ndarray_obj_t *, uint8_t , size_t *, int32_t *, int32_t *);
@ -537,3 +538,90 @@ mp_obj_t ndarray_inplace_divide(ndarray_obj_t *, ndarray_obj_t *, int32_t *);
 } while(0)

 #endif /* ULAB_MAX_DIMS == 4 */
+
+#define MODULO_FLOAT1(results, array, type_left, type_right, larray, lstrides, rarray, rstrides)\
+({\
+    size_t l = 0;\
+    do {\
+        *(array)++ = MICROPY_FLOAT_C_FUN(fmod)(*((type_left *)(larray)), *((type_right *)(rarray)));\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 1];\
+        l++;\
+    } while(l < (results)->shape[ULAB_MAX_DIMS - 1]);\
+})
+
+#if ULAB_MAX_DIMS == 1
+#define MODULO_FLOAT_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\
+    type_out *array = (type_out *)(results)->array;\
+    MODULO_FLOAT1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\
+} while(0)
+#endif /* ULAB_MAX_DIMS == 1 */
+
+#if ULAB_MAX_DIMS == 2
+#define MODULO_FLOAT_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\
+    type_out *array = (type_out *)(results)->array;\
+    size_t l = 0;\
+    do {\
+        MODULO_FLOAT1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+        l++;\
+    } while(l < (results)->shape[ULAB_MAX_DIMS - 2]);\
+} while(0)
+#endif /* ULAB_MAX_DIMS == 2 */
+
+#if ULAB_MAX_DIMS == 3
+#define MODULO_FLOAT_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\
+    type_out *array = (type_out *)(results)->array;\
+    size_t k = 0;\
+    do {\
+        size_t l = 0;\
+        do {\
+            MODULO_FLOAT1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\
+            (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+            l++;\
+        } while(l < (results)->shape[ULAB_MAX_DIMS - 2]);\
+        (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+        (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\
+        (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+        (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+        k++;\
+    } while(k < (results)->shape[ULAB_MAX_DIMS - 3]);\
+} while(0)
+#endif /* ULAB_MAX_DIMS == 3 */
+
+#if ULAB_MAX_DIMS == 4
+#define MODULO_FLOAT_LOOP(results, type_out, type_left, type_right, larray, lstrides, rarray, rstrides) do {\
+    type_out *array = (type_out *)(results)->array;\
+    size_t j = 0;\
+    do {\
+        size_t k = 0;\
+        do {\
+            size_t l = 0;\
+            do {\
+                MODULO_FLOAT1((results), (array), type_left, type_right, (larray), (lstrides), (rarray), (rstrides));\
+                (larray) -= (lstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+                (larray) += (lstrides)[ULAB_MAX_DIMS - 2];\
+                (rarray) -= (rstrides)[ULAB_MAX_DIMS - 1] * (results)->shape[ULAB_MAX_DIMS - 1];\
+                (rarray) += (rstrides)[ULAB_MAX_DIMS - 2];\
+                l++;\
+            } while(l < (results)->shape[ULAB_MAX_DIMS - 2]);\
+            (larray) -= (lstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 3];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 2] * (results)->shape[ULAB_MAX_DIMS - 2];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 3];\
+            k++;\
+        } while(k < (results)->shape[ULAB_MAX_DIMS - 3]);\
+            (larray) -= (lstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS - 3];\
+            (larray) += (lstrides)[ULAB_MAX_DIMS - 4];\
+            (rarray) -= (rstrides)[ULAB_MAX_DIMS - 3] * (results)->shape[ULAB_MAX_DIMS - 3];\
+            (rarray) += (rstrides)[ULAB_MAX_DIMS - 4];\
+        j++;\
+    } while(j < (results)->shape[ULAB_MAX_DIMS - 4]);\
+} while(0)
+#endif /* ULAB_MAX_DIMS == 4 */
--- a/code/ulab.c
+++ b/code/ulab.c
@ -33,7 +33,7 @@
 #include "user/user.h"
 #include "utils/utils.h"

-#define ULAB_VERSION 6.8.0
+#define ULAB_VERSION 6.9.0
 #define xstr(s) str(s)
 #define str(s) #s

--- a/code/ulab.h
+++ b/code/ulab.h
@ -117,6 +117,10 @@
 #define NDARRAY_HAS_BINARY_OP_LESS_EQUAL    (1)
 #endif

+#ifndef NDARRAY_HAS_BINARY_OP_MODULO
+#define NDARRAY_HAS_BINARY_OP_MODULO        (1)
+#endif
+
 #ifndef NDARRAY_HAS_BINARY_OP_MORE
 #define NDARRAY_HAS_BINARY_OP_MORE          (1)
 #endif
--- a/docs/ulab-ndarray.ipynb
+++ b/docs/ulab-ndarray.ipynb
@ -2599,7 +2599,7 @@
   "source": [
    "# Binary operators\n",
    "\n",
-    "`ulab` implements the `+`, `-`, `*`, `/`, `**`, `<`, `>`, `<=`, `>=`, `==`, `!=`, `+=`, `-=`, `*=`, `/=`, `**=`  binary operators, as well as the `AND`, `OR`, `XOR` bit-wise operators that work element-wise. Note that the bit-wise operators will raise an exception, if either of the operands is of `float` or `complex` type.\n",
+    "`ulab` implements the `+`, `-`, `*`, `/`, `**`, `%`, `<`, `>`, `<=`, `>=`, `==`, `!=`, `+=`, `-=`, `*=`, `/=`, `**=`  binary operators, as well as the `AND`, `OR`, `XOR` bit-wise operators that work element-wise. Note that the bit-wise operators will raise an exception, if either of the operands is of `float` or `complex` type.\n",
    "\n",
    "Broadcasting is available, meaning that the two operands do not even have to have the same shape. If the lengths along the respective axes are equal, or one of them is 1, or the axis is missing, the element-wise operation can still be carried out. \n",
    "A thorough explanation of broadcasting can be found under https://numpy.org/doc/stable/user/basics.broadcasting.html. \n",
Author	SHA1	Message	Date
Zoltán Vörös	9f71594f47	fix module loops	2025-07-28 20:23:43 +02:00
Zoltán Vörös	af706e5b46	add modulo operator	2025-07-27 12:16:43 +02:00