From a0dcd1ebab6a8908292e5939c1ceb0ae78f3a4db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= <zvoros@gmail.com>
Date: Sat, 8 Jan 2022 20:27:02 +0100
Subject: [PATCH 01/20] implement numpy.save

---
 code/micropython.mk     |   1 +
 code/numpy/io/io.c      | 163 ++++++++++++++++++++++++++++++++++++++++
 code/numpy/io/io.h      |  16 ++++
 code/numpy/numpy.c      |   4 +
 code/ulab.c             |   2 +-
 code/ulab.h             |   4 +
 docs/ulab-change-log.md |   6 ++
 7 files changed, 195 insertions(+), 1 deletion(-)
 create mode 100644 code/numpy/io/io.c
 create mode 100644 code/numpy/io/io.h

diff --git a/code/micropython.mk b/code/micropython.mk
index d16b177..f36d1d6 100644
--- a/code/micropython.mk
+++ b/code/micropython.mk
@@ -19,6 +19,7 @@ SRC_USERMOD += $(USERMODULES_DIR)/numpy/create.c
 SRC_USERMOD += $(USERMODULES_DIR)/numpy/fft/fft.c
 SRC_USERMOD += $(USERMODULES_DIR)/numpy/fft/fft_tools.c
 SRC_USERMOD += $(USERMODULES_DIR)/numpy/filter.c
+SRC_USERMOD += $(USERMODULES_DIR)/numpy/io/io.c
 SRC_USERMOD += $(USERMODULES_DIR)/numpy/linalg/linalg.c
 SRC_USERMOD += $(USERMODULES_DIR)/numpy/linalg/linalg_tools.c
 SRC_USERMOD += $(USERMODULES_DIR)/numpy/numerical.c
diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c
new file mode 100644
index 0000000..bc51c5b
--- /dev/null
+++ b/code/numpy/io/io.c
@@ -0,0 +1,163 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2022 Zoltán Vörös
+*/
+
+#include <string.h>
+
+#include "py/builtin.h"
+#include "py/obj.h"
+#include "py/runtime.h"
+#include "py/stream.h"
+
+#include "../../ndarray.h"
+#include "io.h"
+
+#define ULAB_IO_BUFFER_SIZE         128
+
+
+static mp_obj_t io_save(mp_obj_t fname, mp_obj_t ndarray_) {
+    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(ndarray_);
+    int error;
+    char *buffer = m_new(char, ULAB_IO_BUFFER_SIZE);
+    uint8_t offset = 0;
+
+    // test for endianness
+    uint16_t x = 1;
+    int8_t endian = (x >> 8) == 1 ? '>' : '<';
+
+    mp_obj_t open_args[2] = {
+        fname,
+        MP_OBJ_NEW_QSTR(MP_QSTR_wb)
+    };
+
+    mp_obj_t npy = mp_builtin_open(2, open_args, (mp_map_t *)&mp_const_empty_map);
+    const mp_stream_p_t *fout = mp_get_stream(npy);
+
+    // write header;
+    // magic string + header length, which is always 128 - 10 = 118, represented as a little endian uint16 (0x76, 0x00)
+    // + beginning of the dictionary describing the array
+    memcpy(buffer, "\x93NUMPY\x01\x00\x76\x00{'descr': '", 21);
+    offset += 21;
+
+    buffer[offset] = endian;
+    if((ndarray->dtype == NDARRAY_UINT8) || (ndarray->dtype == NDARRAY_INT8)) {
+        // for single-byte data, the endianness doesn't matter
+        buffer[offset] = '|';
+    }
+    offset++;
+    switch(ndarray->dtype) {
+        case NDARRAY_UINT8:
+            memcpy(buffer+offset, "u1", 2);
+            break;
+        case NDARRAY_INT8:
+            memcpy(buffer+offset, "i1", 2);
+            break;
+        case NDARRAY_UINT16:
+            memcpy(buffer+offset, "u2", 2);
+            break;
+        case NDARRAY_INT16:
+            memcpy(buffer+offset, "i2", 2);
+            break;
+        case NDARRAY_FLOAT:
+            #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+            memcpy(buffer+offset, "f4", 2);
+            #else
+            memcpy(buffer+offset, "f8", 2);
+            #endif
+            break;
+        #if ULAB_SUPPORTS_COMPLEX
+        case NDARRAY_COMPLEX:
+            #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+            memcpy(buffer+offset, "c8", 2);
+            #else
+            memcpy(buffer+offset, "c16", 2);
+            #endif
+            offset++;
+            break;
+        #endif
+    }
+
+    offset += 2;
+    memcpy(buffer+offset, "', 'fortran_order': False, 'shape': (", 37);
+    offset += 37;
+
+    if(ndarray->ndim == 1) {
+        offset += sprintf(buffer+offset, "%ld,", ndarray->shape[ULAB_MAX_DIMS - 1]);
+    } else {
+        for(uint8_t i = 0; i < ndarray->ndim - 1; i++) {
+            offset += sprintf(buffer+offset, "%ld, ", ndarray->shape[ULAB_MAX_DIMS - i - 1]);
+        }
+        offset += sprintf(buffer+offset, "%ld", ndarray->shape[ULAB_MAX_DIMS - 1]);
+    }
+    memcpy(buffer+offset, "), }", 4);
+    offset += 4;
+    // pad with space till the very end
+    memset(buffer+offset, 32, ULAB_IO_BUFFER_SIZE - offset - 1);
+    buffer[ULAB_IO_BUFFER_SIZE - 1] = '\n';
+    fout->write(npy, buffer, ULAB_IO_BUFFER_SIZE, &error);
+
+    // write the array data
+    uint8_t sz = ndarray->itemsize;
+    offset = 0;
+
+    uint8_t *array = (uint8_t *)ndarray->array;
+
+    // TODO: if flatiter is available, we can save the loop expansion
+    #if ULAB_MAX_DIMS > 3
+    size_t i = 0;
+    do {
+    #endif
+        #if ULAB_MAX_DIMS > 2
+        size_t j = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 1
+            size_t k = 0;
+            do {
+            #endif
+                size_t l = 0;
+                do {
+                    memcpy(buffer+offset, array, sz);
+                    offset += sz;
+                    if(offset == ULAB_IO_BUFFER_SIZE) {
+                        fout->write(npy, buffer, offset, &error);
+                        offset = 0;
+                    }
+                    array += ndarray->strides[ULAB_MAX_DIMS - 1];
+                    l++;
+                } while(l <  ndarray->shape[ULAB_MAX_DIMS - 1]);
+            #if ULAB_MAX_DIMS > 1
+                array -= ndarray->strides[ULAB_MAX_DIMS - 1] * ndarray->shape[ULAB_MAX_DIMS-1];
+                array += ndarray->strides[ULAB_MAX_DIMS - 2];
+                k++;
+            } while(k <  ndarray->shape[ULAB_MAX_DIMS - 2]);
+            #endif
+        #if ULAB_MAX_DIMS > 2
+            array -= ndarray->strides[ULAB_MAX_DIMS - 2] * ndarray->shape[ULAB_MAX_DIMS-2];
+            array += ndarray->strides[ULAB_MAX_DIMS - 3];
+            j++;
+        } while(j <  ndarray->shape[ULAB_MAX_DIMS - 3]);
+        #endif
+    #if ULAB_MAX_DIMS > 3
+        array -= ndarray->strides[ULAB_MAX_DIMS - 3] * ndarray->shape[ULAB_MAX_DIMS-3];
+        array += ndarray->strides[ULAB_MAX_DIMS - 4];
+        i++;
+    } while(i <  ndarray->shape[ULAB_MAX_DIMS - 4]);
+    #endif
+
+    if(offset != 0) {
+        fout->write(npy, buffer, offset, &error);
+    }
+
+    m_del(char, buffer, ULAB_IO_BUFFER_SIZE);
+    return mp_const_none;
+}
+
+MP_DEFINE_CONST_FUN_OBJ_2(io_save_obj, io_save);
+
diff --git a/code/numpy/io/io.h b/code/numpy/io/io.h
new file mode 100644
index 0000000..7dcf2b5
--- /dev/null
+++ b/code/numpy/io/io.h
@@ -0,0 +1,16 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2022 Zoltán Vörös
+*/
+
+#ifndef _ULAB_IO_
+#define _ULAB_IO_
+
+MP_DECLARE_CONST_FUN_OBJ_2(io_save_obj);
+
+#endif
\ No newline at end of file
diff --git a/code/numpy/numpy.c b/code/numpy/numpy.c
index d674db6..59a7b0a 100644
--- a/code/numpy/numpy.c
+++ b/code/numpy/numpy.c
@@ -23,6 +23,7 @@
 #include "create.h"
 #include "fft/fft.h"
 #include "filter.h"
+#include "io/io.h"
 #include "linalg/linalg.h"
 #include "numerical.h"
 #include "stats.h"
@@ -265,6 +266,9 @@ static const mp_rom_map_elem_t ulab_numpy_globals_table[] = {
     #if ULAB_NUMPY_HAS_ROLL
         { MP_OBJ_NEW_QSTR(MP_QSTR_roll), (mp_obj_t)&numerical_roll_obj },
     #endif
+    #if ULAB_NUMPY_HAS_SAVE
+        { MP_OBJ_NEW_QSTR(MP_QSTR_save), (mp_obj_t)&io_save_obj },
+    #endif
     #if ULAB_NUMPY_HAS_SORT
         { MP_OBJ_NEW_QSTR(MP_QSTR_sort), (mp_obj_t)&numerical_sort_obj },
     #endif
diff --git a/code/ulab.c b/code/ulab.c
index dd2c994..8515868 100644
--- a/code/ulab.c
+++ b/code/ulab.c
@@ -33,7 +33,7 @@
 #include "user/user.h"
 #include "utils/utils.h"
 
-#define ULAB_VERSION 4.0.0
+#define ULAB_VERSION 4.1.0
 #define xstr(s) str(s)
 #define str(s) #s
 
diff --git a/code/ulab.h b/code/ulab.h
index 924f4c7..dec42ee 100644
--- a/code/ulab.h
+++ b/code/ulab.h
@@ -478,6 +478,10 @@
 #define ULAB_NUMPY_HAS_ROLL             (1)
 #endif
 
+#ifndef ULAB_NUMPY_HAS_SAVE
+#define ULAB_NUMPY_HAS_SAVE             (1)
+#endif
+
 #ifndef ULAB_NUMPY_HAS_SORT
 #define ULAB_NUMPY_HAS_SORT             (1)
 #endif
diff --git a/docs/ulab-change-log.md b/docs/ulab-change-log.md
index be9dc5e..ac81fb3 100644
--- a/docs/ulab-change-log.md
+++ b/docs/ulab-change-log.md
@@ -1,3 +1,9 @@
+Sat, 8 Jan 2022
+
+    version 4.1.0
+
+    implement numpy.save
+
 Fri, 3 Dec 2021
 
 version 3.3.8

From 68f809801682189f217600b07111d4a0363732fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= <zvoros@gmail.com>
Date: Sat, 8 Jan 2022 20:31:00 +0100
Subject: [PATCH 02/20] include stdio.h for mac

---
 code/numpy/io/io.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c
index bc51c5b..eb616e0 100644
--- a/code/numpy/io/io.c
+++ b/code/numpy/io/io.c
@@ -9,6 +9,7 @@
 */
 
 #include <string.h>
+#include <stdio.h>
 
 #include "py/builtin.h"
 #include "py/obj.h"

From 78ba99f0090ab0f82b118719f953cf9b60d97b78 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= <zvoros@gmail.com>
Date: Sat, 8 Jan 2022 20:48:30 +0100
Subject: [PATCH 03/20] add input type checking

---
 code/numpy/io/io.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c
index eb616e0..3f804d4 100644
--- a/code/numpy/io/io.c
+++ b/code/numpy/io/io.c
@@ -23,6 +23,10 @@
 
 
 static mp_obj_t io_save(mp_obj_t fname, mp_obj_t ndarray_) {
+    if(!mp_obj_is_str(fname) || !mp_obj_is_type(ndarray_, &ulab_ndarray_type)) {
+        mp_raise_TypeError(translate("wrong input type"));
+    }
+
     ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(ndarray_);
     int error;
     char *buffer = m_new(char, ULAB_IO_BUFFER_SIZE);

From 3fd60dedf96740db2e4392dbd48b016b415e9940 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= <zvoros@gmail.com>
Date: Sun, 9 Jan 2022 20:12:09 +0100
Subject: [PATCH 04/20] fix loop index in numpy.save

---
 code/numpy/io/io.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c
index 3f804d4..4e8639b 100644
--- a/code/numpy/io/io.c
+++ b/code/numpy/io/io.c
@@ -95,8 +95,8 @@ static mp_obj_t io_save(mp_obj_t fname, mp_obj_t ndarray_) {
     if(ndarray->ndim == 1) {
         offset += sprintf(buffer+offset, "%ld,", ndarray->shape[ULAB_MAX_DIMS - 1]);
     } else {
-        for(uint8_t i = 0; i < ndarray->ndim - 1; i++) {
-            offset += sprintf(buffer+offset, "%ld, ", ndarray->shape[ULAB_MAX_DIMS - i - 1]);
+        for(uint8_t i = ndarray->ndim; i > 1; i--) {
+            offset += sprintf(buffer+offset, "%ld, ", ndarray->shape[ULAB_MAX_DIMS - i]);
         }
         offset += sprintf(buffer+offset, "%ld", ndarray->shape[ULAB_MAX_DIMS - 1]);
     }
@@ -113,7 +113,6 @@ static mp_obj_t io_save(mp_obj_t fname, mp_obj_t ndarray_) {
 
     uint8_t *array = (uint8_t *)ndarray->array;
 
-    // TODO: if flatiter is available, we can save the loop expansion
     #if ULAB_MAX_DIMS > 3
     size_t i = 0;
     do {
@@ -156,9 +155,7 @@ static mp_obj_t io_save(mp_obj_t fname, mp_obj_t ndarray_) {
     } while(i <  ndarray->shape[ULAB_MAX_DIMS - 4]);
     #endif
 
-    if(offset != 0) {
-        fout->write(npy, buffer, offset, &error);
-    }
+    fout->write(npy, buffer, offset, &error);
 
     m_del(char, buffer, ULAB_IO_BUFFER_SIZE);
     return mp_const_none;

From 9d84125232572e1b97dcecc9b73f5aed27a7046e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= <zvoros@gmail.com>
Date: Tue, 11 Jan 2022 19:18:48 +0100
Subject: [PATCH 05/20] close file in save implementation

---
 code/numpy/io/io.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c
index 4e8639b..16fcd96 100644
--- a/code/numpy/io/io.c
+++ b/code/numpy/io/io.c
@@ -156,6 +156,7 @@ static mp_obj_t io_save(mp_obj_t fname, mp_obj_t ndarray_) {
     #endif
 
     fout->write(npy, buffer, offset, &error);
+    fout->ioctl(npy, MP_STREAM_CLOSE, 0, &error);
 
     m_del(char, buffer, ULAB_IO_BUFFER_SIZE);
     return mp_const_none;

From 8e42afe72ee756b7aecff918a28a973cf88ff24b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= <zvoros@gmail.com>
Date: Wed, 12 Jan 2022 18:10:34 +0100
Subject: [PATCH 06/20] attempt to fix eps32 compilation error

---
 code/numpy/io/io.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c
index 16fcd96..f604c57 100644
--- a/code/numpy/io/io.c
+++ b/code/numpy/io/io.c
@@ -93,12 +93,12 @@ static mp_obj_t io_save(mp_obj_t fname, mp_obj_t ndarray_) {
     offset += 37;
 
     if(ndarray->ndim == 1) {
-        offset += sprintf(buffer+offset, "%ld,", ndarray->shape[ULAB_MAX_DIMS - 1]);
+        offset += sprintf(buffer+offset, "%zu,", ndarray->shape[ULAB_MAX_DIMS - 1]);
     } else {
         for(uint8_t i = ndarray->ndim; i > 1; i--) {
-            offset += sprintf(buffer+offset, "%ld, ", ndarray->shape[ULAB_MAX_DIMS - i]);
+            offset += sprintf(buffer+offset, "%zu, ", ndarray->shape[ULAB_MAX_DIMS - i]);
         }
-        offset += sprintf(buffer+offset, "%ld", ndarray->shape[ULAB_MAX_DIMS - 1]);
+        offset += sprintf(buffer+offset, "%zu", ndarray->shape[ULAB_MAX_DIMS - 1]);
     }
     memcpy(buffer+offset, "), }", 4);
     offset += 4;

From 17afe29124051a5c6dfeb1a19ba685dc304e3a61 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= <zvoros@gmail.com>
Date: Wed, 12 Jan 2022 20:17:24 +0100
Subject: [PATCH 07/20] implement load

---
 code/numpy/io/io.c                     | 218 ++++++++++++++++++++++++-
 code/numpy/io/io.h                     |   1 +
 code/numpy/numpy.c                     |   3 +
 code/ulab.c                            |   2 +-
 code/ulab.h                            |   4 +
 docs/manual/source/conf.py             |   2 +-
 docs/manual/source/numpy-functions.rst |  99 +++++++++--
 docs/numpy-functions.ipynb             |  95 ++++++++++-
 docs/ulab-change-log.md                |   6 +-
 docs/ulab-convert.ipynb                |  16 +-
 tests/2d/numpy/load_save.py            |  14 ++
 tests/2d/numpy/load_save.py.exp        |  30 ++++
 12 files changed, 449 insertions(+), 41 deletions(-)
 create mode 100644 tests/2d/numpy/load_save.py
 create mode 100644 tests/2d/numpy/load_save.py.exp

diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c
index 16fcd96..5ea0a30 100644
--- a/code/numpy/io/io.c
+++ b/code/numpy/io/io.c
@@ -21,9 +21,215 @@
 
 #define ULAB_IO_BUFFER_SIZE         128
 
+#define ULAB_IO_LITTLE_ENDIAN       0
+#define ULAB_IO_BIG_ENDIAN          1
 
-static mp_obj_t io_save(mp_obj_t fname, mp_obj_t ndarray_) {
-    if(!mp_obj_is_str(fname) || !mp_obj_is_type(ndarray_, &ulab_ndarray_type)) {
+#if ULAB_NUMPY_HAS_LOAD
+static void io_read_(mp_obj_t npy, const mp_stream_p_t *fin, char *buffer, char *string, uint16_t len, int *error) {
+    size_t read = fin->read(npy, buffer, len, error);
+    bool fail = false;
+    if(read == len) {
+        if(string != NULL) {
+            if(memcmp(buffer, string, len) != 0) {
+                fail = true;
+            }
+        }
+    } else {
+        fail = true;
+    }
+    if(fail) {
+        fin->ioctl(npy, MP_STREAM_CLOSE, 0, error);
+        mp_raise_ValueError(translate("corrupted file"));
+    }
+}
+
+static mp_obj_t io_load(mp_obj_t file) {
+    if(!mp_obj_is_str(file)) {
+        mp_raise_TypeError(translate("wrong input type"));
+    }
+
+    int error;
+    char *buffer = m_new(char, ULAB_IO_BUFFER_SIZE);
+
+    // test for endianness
+    uint16_t x = 1;
+    int8_t native_endianness = (x >> 8) == 1 ? ULAB_IO_BIG_ENDIAN : ULAB_IO_LITTLE_ENDIAN;
+
+    mp_obj_t open_args[2] = {
+        file,
+        MP_OBJ_NEW_QSTR(MP_QSTR_rb)
+    };
+
+    mp_obj_t npy = mp_builtin_open(2, open_args, (mp_map_t *)&mp_const_empty_map);
+    const mp_stream_p_t *fin = mp_get_stream(npy);
+
+    // read header
+    // magic string
+    io_read_(npy, fin, buffer, "\x93NUMPY", 6, &error);
+    // simply discard the version number
+    io_read_(npy, fin, buffer, NULL, 2, &error);
+    // header length, represented as a little endian uint16 (0x76, 0x00)
+    io_read_(npy, fin, buffer, NULL, 2, &error);
+
+    uint16_t header_length = buffer[1];
+    header_length <<= 8;
+    header_length += buffer[0];
+
+    // beginning of the dictionary describing the array
+    io_read_(npy, fin, buffer, "{'descr': '", 11, &error);
+    uint8_t dtype;
+
+    io_read_(npy, fin, buffer, NULL, 1, &error);
+    uint8_t endianness;
+    if(*buffer == '<') {
+        endianness = ULAB_IO_LITTLE_ENDIAN;
+    } else if(*buffer == '>') {
+        endianness = ULAB_IO_BIG_ENDIAN;
+    }
+
+    io_read_(npy, fin, buffer, NULL, 2, &error);
+    if(memcmp(buffer, "u1", 2) == 0) {
+        dtype = NDARRAY_UINT8;
+    } else if(memcmp(buffer, "i1", 2) == 0) {
+        dtype = NDARRAY_INT8;
+    } else if(memcmp(buffer, "u2", 2) == 0) {
+        dtype = NDARRAY_UINT16;
+    } else if(memcmp(buffer, "i2", 2) == 0) {
+        dtype = NDARRAY_INT16;
+    }
+    #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+    else if(memcmp(buffer, "f4", 2) == 0) {
+        dtype = NDARRAY_FLOAT;
+    }
+    #else
+    else if(memcmp(buffer, "f8", 2) == 0) {
+        dtype = NDARRAY_FLOAT;
+    }
+    #endif
+    #if ULAB_SUPPORTS_COMPLEX
+    #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+    else if(memcmp(buffer, "c4", 2) == 0) {
+        dtype = NDARRAY_COMPLEX;
+    }
+    #else
+    else if(memcmp(buffer, "c8", 2) == 0) {
+        dtype = NDARRAY_COMPLEX;
+    }
+    #endif
+    #endif /* ULAB_SUPPORT_COPMLEX */
+    else {
+        mp_raise_TypeError(translate("wrong dtype"));
+    }
+
+    io_read_(npy, fin, buffer, "', 'fortran_order': False, 'shape': (", 37, &error);
+
+    size_t *shape = m_new(size_t, ULAB_MAX_DIMS);
+    memset(shape, 0, sizeof(size_t) * ULAB_MAX_DIMS);
+
+    uint16_t bytes_to_read = MIN(ULAB_IO_BUFFER_SIZE, header_length - 51);
+    // bytes_to_read is 128 at most. This should be enough to contain a
+    // maximum of 4 size_t numbers plus the delimiters
+    io_read_(npy, fin, buffer, NULL, bytes_to_read, &error);
+    char *needle = buffer;
+    uint8_t ndim = 0;
+
+    // find out the number of dimensions by counting the commas in the string
+    while(1) {
+        if(*needle == ',') {
+            ndim++;
+            if(needle[1] == ')') {
+                break;
+            }
+        } else if((*needle == ')') && (ndim > 0)) {
+            ndim++;
+            break;
+        }
+        needle++;
+    }
+
+    needle = buffer;
+    for(uint8_t i = 0; i < ndim; i++) {
+        size_t number = 0;
+        // trivial number parsing here
+        while(1) {
+            if((*needle == ' ') || (*needle == '\t')) {
+                needle++;
+            }
+            if((*needle > 47) && (*needle < 58)) {
+                number = number * 10 + (*needle - 48);
+            } else if((*needle == ',') || (*needle == ')')) {
+                break;
+            }
+            else {
+                mp_raise_ValueError(translate("corrupted file"));
+            }
+            needle++;
+        }
+        needle++;
+        shape[ULAB_MAX_DIMS - ndim + i] = number;
+    }
+
+    // strip the rest of the header
+    if((bytes_to_read + 51) < header_length) {
+
+        io_read_(npy, fin, buffer, NULL, 1, &error);
+    }
+
+    ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(ndim, shape, dtype);
+    char *array = (char *)ndarray->array;
+
+    size_t read = fin->read(npy, array, ndarray->len * ndarray->itemsize, &error);
+    if(read != ndarray->len * ndarray->itemsize) {
+        mp_raise_ValueError(translate("corrupted file"));
+    }
+
+    fin->ioctl(npy, MP_STREAM_CLOSE, 0, &error);
+    m_del(char, buffer, ULAB_IO_BUFFER_SIZE);
+
+    // swap the bytes, if necessary
+    if((native_endianness != endianness) && (dtype != NDARRAY_UINT8) && (dtype != NDARRAY_INT8)) {
+        uint8_t sz = ndarray->itemsize;
+        char *tmpbuff = NULL;
+
+        #if ULAB_SUPPORTS_COMPLEX
+        if(dtype == NDARRAY_COMPLEX) {
+            // work with the floating point real and imaginary parts
+            sz /= 2;
+            tmpbuff = m_new(char, sz);
+            for(size_t i = 0; i < ndarray->len; i++) {
+                for(uint8_t k = 0; k < 2; k++) {
+                    tmpbuff += sz;
+                    for(uint8_t j = 0; j < sz; j++) {
+                        memcpy(--tmpbuff, array++, 1);
+                    }
+                    memcpy(array-sz, tmpbuff, sz);
+                }
+            }
+        } else {
+        #endif
+            tmpbuff = m_new(char, sz);
+            for(size_t i = 0; i < ndarray->len; i++) {
+                tmpbuff += sz;
+                for(uint8_t j = 0; j < sz; j++) {
+                    memcpy(--tmpbuff, array++, 1);
+                }
+                memcpy(array-sz, tmpbuff, sz);
+            }
+        #if ULAB_SUPPORTS_COMPLEX
+        }
+        #endif
+        m_del(char, tmpbuff, sz);
+    }
+
+    return MP_OBJ_FROM_PTR(ndarray);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_1(io_load_obj, io_load);
+#endif /* ULAB_NUMPY_HAS_LOAD */
+
+#if ULAB_NUMPY_HAS_SAVE
+static mp_obj_t io_save(mp_obj_t file, mp_obj_t ndarray_) {
+    if(!mp_obj_is_str(file) || !mp_obj_is_type(ndarray_, &ulab_ndarray_type)) {
         mp_raise_TypeError(translate("wrong input type"));
     }
 
@@ -34,10 +240,10 @@ static mp_obj_t io_save(mp_obj_t fname, mp_obj_t ndarray_) {
 
     // test for endianness
     uint16_t x = 1;
-    int8_t endian = (x >> 8) == 1 ? '>' : '<';
+    int8_t native_endiannes = (x >> 8) == 1 ? '>' : '<';
 
     mp_obj_t open_args[2] = {
-        fname,
+        file,
         MP_OBJ_NEW_QSTR(MP_QSTR_wb)
     };
 
@@ -50,7 +256,7 @@ static mp_obj_t io_save(mp_obj_t fname, mp_obj_t ndarray_) {
     memcpy(buffer, "\x93NUMPY\x01\x00\x76\x00{'descr': '", 21);
     offset += 21;
 
-    buffer[offset] = endian;
+    buffer[offset] = native_endiannes;
     if((ndarray->dtype == NDARRAY_UINT8) || (ndarray->dtype == NDARRAY_INT8)) {
         // for single-byte data, the endianness doesn't matter
         buffer[offset] = '|';
@@ -163,4 +369,4 @@ static mp_obj_t io_save(mp_obj_t fname, mp_obj_t ndarray_) {
 }
 
 MP_DEFINE_CONST_FUN_OBJ_2(io_save_obj, io_save);
-
+#endif /* ULAB_NUMPY_HAS_SAVE */
diff --git a/code/numpy/io/io.h b/code/numpy/io/io.h
index 7dcf2b5..a9dcdfc 100644
--- a/code/numpy/io/io.h
+++ b/code/numpy/io/io.h
@@ -12,5 +12,6 @@
 #define _ULAB_IO_
 
 MP_DECLARE_CONST_FUN_OBJ_2(io_save_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(io_load_obj);
 
 #endif
\ No newline at end of file
diff --git a/code/numpy/numpy.c b/code/numpy/numpy.c
index 59a7b0a..57e3f00 100644
--- a/code/numpy/numpy.c
+++ b/code/numpy/numpy.c
@@ -251,6 +251,9 @@ static const mp_rom_map_elem_t ulab_numpy_globals_table[] = {
     #if ULAB_NUMPY_HAS_FLIP
         { MP_OBJ_NEW_QSTR(MP_QSTR_flip), (mp_obj_t)&numerical_flip_obj },
     #endif
+    #if ULAB_NUMPY_HAS_LOAD
+        { MP_OBJ_NEW_QSTR(MP_QSTR_load), (mp_obj_t)&io_load_obj },
+    #endif
     #if ULAB_NUMPY_HAS_MINMAX
         { MP_OBJ_NEW_QSTR(MP_QSTR_max), (mp_obj_t)&numerical_max_obj },
     #endif
diff --git a/code/ulab.c b/code/ulab.c
index 8515868..a65f54a 100644
--- a/code/ulab.c
+++ b/code/ulab.c
@@ -33,7 +33,7 @@
 #include "user/user.h"
 #include "utils/utils.h"
 
-#define ULAB_VERSION 4.1.0
+#define ULAB_VERSION 4.2.0
 #define xstr(s) str(s)
 #define str(s) #s
 
diff --git a/code/ulab.h b/code/ulab.h
index dec42ee..65d0ce0 100644
--- a/code/ulab.h
+++ b/code/ulab.h
@@ -454,6 +454,10 @@
 #define ULAB_NUMPY_HAS_INTERP           (1)
 #endif
 
+#ifndef ULAB_NUMPY_HAS_LOAD
+#define ULAB_NUMPY_HAS_LOAD             (1)
+#endif
+
 #ifndef ULAB_NUMPY_HAS_MEAN
 #define ULAB_NUMPY_HAS_MEAN             (1)
 #endif
diff --git a/docs/manual/source/conf.py b/docs/manual/source/conf.py
index 5c7b7dc..1275760 100644
--- a/docs/manual/source/conf.py
+++ b/docs/manual/source/conf.py
@@ -27,7 +27,7 @@ copyright = '2019-2022, Zoltán Vörös and contributors'
 author = 'Zoltán Vörös'
 
 # The full version, including alpha/beta/rc tags
-release = '4.0.0'
+release = '4.2.0'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/docs/manual/source/numpy-functions.rst b/docs/manual/source/numpy-functions.rst
index 206d641..9962ebd 100644
--- a/docs/manual/source/numpy-functions.rst
+++ b/docs/manual/source/numpy-functions.rst
@@ -23,24 +23,26 @@ the firmware was compiled with complex support.
 15. `numpy.interp <#interp>`__
 16. `numpy.isfinite <#isfinite>`__
 17. `numpy.isinf <#isinf>`__
-18. `numpy.max <#max>`__
-19. `numpy.maximum <#maximum>`__
-20. `numpy.mean <#mean>`__
-21. `numpy.median <#median>`__
-22. `numpy.min <#min>`__
-23. `numpy.minimum <#minimum>`__
-24. `numpy.not_equal <#equal>`__
-25. `numpy.polyfit <#polyfit>`__
-26. `numpy.polyval <#polyval>`__
-27. `numpy.real\* <#real>`__
-28. `numpy.roll <#roll>`__
-29. `numpy.sort <#sort>`__
-30. `numpy.sort_complex\* <#sort_complex>`__
-31. `numpy.std <#std>`__
-32. `numpy.sum <#sum>`__
-33. `numpy.trace <#trace>`__
-34. `numpy.trapz <#trapz>`__
-35. `numpy.where <#where>`__
+18. `numpy.load <#load>`__
+19. `numpy.max <#max>`__
+20. `numpy.maximum <#maximum>`__
+21. `numpy.mean <#mean>`__
+22. `numpy.median <#median>`__
+23. `numpy.min <#min>`__
+24. `numpy.minimum <#minimum>`__
+25. `numpy.not_equal <#equal>`__
+26. `numpy.polyfit <#polyfit>`__
+27. `numpy.polyval <#polyval>`__
+28. `numpy.real\* <#real>`__
+29. `numpy.roll <#roll>`__
+30. `numpy.save <#save>`__
+31. `numpy.sort <#sort>`__
+32. `numpy.sort_complex\* <#sort_complex>`__
+33. `numpy.std <#std>`__
+34. `numpy.sum <#sum>`__
+35. `numpy.trace <#trace>`__
+36. `numpy.trapz <#trapz>`__
+37. `numpy.where <#where>`__
 
 all
 ---
@@ -871,6 +873,39 @@ positions, where the input is infinite. Integer types return the
     
 
 
+load
+----
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.load.html
+
+The function reads data from a file in ``numpy``\ ’s
+`platform-independent
+format <https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html#module-numpy.lib.format>`__,
+and returns the generated array. If the endianness of the data in the
+file and the microcontroller differ, the bytes are automatically
+swapped.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.load('a.npy')
+    print(a)
+
+.. parsed-literal::
+
+    array([[0.0, 1.0, 2.0, 3.0, 4.0],
+           [5.0, 6.0, 7.0, 8.0, 9.0],
+           [10.0, 11.0, 12.0, 13.0, 14.0],
+           [15.0, 16.0, 17.0, 18.0, 19.0],
+           [20.0, 21.0, 22.0, 23.0, 24.0]], dtype=float64)
+    
+    
+
+
 mean
 ----
 
@@ -1319,6 +1354,34 @@ Vertical rolls require two internal copies of single columns.
     
 
 
+save
+----
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.save.html
+
+With the help of this function, numerical array can be save in
+``numpy``\ ’s `platform-independent
+format <https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html#module-numpy.lib.format>`__.
+
+The function takes two positional arguments, the name of the output
+file, and the array.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.array(range(25)).reshape((5, 5))
+    np.save('a.npy', a)
+
+.. parsed-literal::
+
+    
+    
+
+
 sort
 ----
 
diff --git a/docs/numpy-functions.ipynb b/docs/numpy-functions.ipynb
index f115a41..9e57507 100644
--- a/docs/numpy-functions.ipynb
+++ b/docs/numpy-functions.ipynb
@@ -34,8 +34,8 @@
    "execution_count": 1,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-01-07T19:45:28.079350Z",
-     "start_time": "2022-01-07T19:45:28.073911Z"
+     "end_time": "2022-01-12T19:06:59.366828Z",
+     "start_time": "2022-01-12T19:06:59.359952Z"
     }
    },
    "outputs": [],
@@ -52,8 +52,8 @@
    "execution_count": 2,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-01-07T19:45:28.654136Z",
-     "start_time": "2022-01-07T19:45:28.634610Z"
+     "end_time": "2022-01-12T19:07:00.590084Z",
+     "start_time": "2022-01-12T19:07:00.563790Z"
     }
    },
    "outputs": [],
@@ -249,6 +249,7 @@
     "1. [numpy.interp](#interp)\n",
     "1. [numpy.isfinite](#isfinite)\n",
     "1. [numpy.isinf](#isinf)\n",
+    "1. [numpy.load](#load)\n",
     "1. [numpy.max](#max)\n",
     "1. [numpy.maximum](#maximum)\n",
     "1. [numpy.mean](#mean)\n",
@@ -260,6 +261,7 @@
     "1. [numpy.polyval](#polyval)\n",
     "1. [numpy.real*](#real)\n",
     "1. [numpy.roll](#roll)\n",
+    "1. [numpy.save](#save)\n",
     "1. [numpy.sort](#sort)\n",
     "1. [numpy.sort_complex*](#sort_complex)\n",
     "1. [numpy.std](#std)\n",
@@ -1291,6 +1293,50 @@
     "print('\\nisinf(c):\\n', np.isinf(c))"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## load\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.load.html\n",
+    "\n",
+    "The function reads data from a file in `numpy`'s [platform-independent format](https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html#module-numpy.lib.format), and returns the generated array. If the endianness of the data in the file and the microcontroller differ, the bytes are automatically swapped."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-12T19:11:10.361592Z",
+     "start_time": "2022-01-12T19:11:10.342439Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([[0.0, 1.0, 2.0, 3.0, 4.0],\n",
+      "       [5.0, 6.0, 7.0, 8.0, 9.0],\n",
+      "       [10.0, 11.0, 12.0, 13.0, 14.0],\n",
+      "       [15.0, 16.0, 17.0, 18.0, 19.0],\n",
+      "       [20.0, 21.0, 22.0, 23.0, 24.0]], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.load('a.npy')\n",
+    "print(a)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -1872,6 +1918,47 @@
     "print(\"\\na rolled with None:\\n\", a)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## save\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.save.html\n",
+    "\n",
+    "With the help of this function, numerical array can be save in `numpy`'s [platform-independent format](https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html#module-numpy.lib.format).\n",
+    "\n",
+    "The function takes two positional arguments, the name of the output file, and the array. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-12T19:10:15.861415Z",
+     "start_time": "2022-01-12T19:10:15.852451Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.array(range(25)).reshape((5, 5))\n",
+    "np.save('a.npy', a)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
diff --git a/docs/ulab-change-log.md b/docs/ulab-change-log.md
index ac81fb3..c50c127 100644
--- a/docs/ulab-change-log.md
+++ b/docs/ulab-change-log.md
@@ -1,8 +1,8 @@
-Sat, 8 Jan 2022
+Wed, 12 Jan 2022
 
-    version 4.1.0
+    version 4.2.0
 
-    implement numpy.save
+    implement numpy.save, numpy.load
 
 Fri, 3 Dec 2021
 
diff --git a/docs/ulab-convert.ipynb b/docs/ulab-convert.ipynb
index 70c6fa4..a248d28 100644
--- a/docs/ulab-convert.ipynb
+++ b/docs/ulab-convert.ipynb
@@ -17,8 +17,8 @@
    "execution_count": 1,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-01-07T18:24:12.745063Z",
-     "start_time": "2022-01-07T18:24:12.733067Z"
+     "end_time": "2022-01-12T19:13:49.823884Z",
+     "start_time": "2022-01-12T19:13:49.814198Z"
     }
    },
    "outputs": [
@@ -61,7 +61,7 @@
     "author = 'Zoltán Vörös'\n",
     "\n",
     "# The full version, including alpha/beta/rc tags\n",
-    "release = '4.0.0'\n",
+    "release = '4.2.0'\n",
     "\n",
     "\n",
     "# -- General configuration ---------------------------------------------------\n",
@@ -218,8 +218,8 @@
    "execution_count": 2,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-01-07T18:24:27.671415Z",
-     "start_time": "2022-01-07T18:24:24.933205Z"
+     "end_time": "2022-01-12T19:13:56.492894Z",
+     "start_time": "2022-01-12T19:13:55.202514Z"
     }
    },
    "outputs": [],
@@ -256,11 +256,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-01-07T19:52:29.910335Z",
-     "start_time": "2022-01-07T19:52:28.432391Z"
+     "end_time": "2022-01-12T19:15:22.428830Z",
+     "start_time": "2022-01-12T19:15:17.704906Z"
     }
    },
    "outputs": [],
diff --git a/tests/2d/numpy/load_save.py b/tests/2d/numpy/load_save.py
new file mode 100644
index 0000000..6fb9d2a
--- /dev/null
+++ b/tests/2d/numpy/load_save.py
@@ -0,0 +1,14 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float)
+
+for dtype in dtypes:
+    a = np.array(range(25), dtype=dtype)
+    b = a.reshape((5, 5))
+    np.save('out.npy', a)
+    print(np.load('out.npy'))
+    np.save('out.npy', b)
+    print(np.load('out.npy'))
diff --git a/tests/2d/numpy/load_save.py.exp b/tests/2d/numpy/load_save.py.exp
new file mode 100644
index 0000000..71ca601
--- /dev/null
+++ b/tests/2d/numpy/load_save.py.exp
@@ -0,0 +1,30 @@
+array([0, 1, 2, ..., 22, 23, 24], dtype=uint8)
+array([[0, 1, 2, 3, 4],
+       [5, 6, 7, 8, 9],
+       [10, 11, 12, 13, 14],
+       [15, 16, 17, 18, 19],
+       [20, 21, 22, 23, 24]], dtype=uint8)
+array([0, 1, 2, ..., 22, 23, 24], dtype=int8)
+array([[0, 1, 2, 3, 4],
+       [5, 6, 7, 8, 9],
+       [10, 11, 12, 13, 14],
+       [15, 16, 17, 18, 19],
+       [20, 21, 22, 23, 24]], dtype=int8)
+array([0, 1, 2, ..., 22, 23, 24], dtype=uint16)
+array([[0, 1, 2, 3, 4],
+       [5, 6, 7, 8, 9],
+       [10, 11, 12, 13, 14],
+       [15, 16, 17, 18, 19],
+       [20, 21, 22, 23, 24]], dtype=uint16)
+array([0, 1, 2, ..., 22, 23, 24], dtype=int16)
+array([[0, 1, 2, 3, 4],
+       [5, 6, 7, 8, 9],
+       [10, 11, 12, 13, 14],
+       [15, 16, 17, 18, 19],
+       [20, 21, 22, 23, 24]], dtype=int16)
+array([0.0, 1.0, 2.0, ..., 22.0, 23.0, 24.0], dtype=float64)
+array([[0.0, 1.0, 2.0, 3.0, 4.0],
+       [5.0, 6.0, 7.0, 8.0, 9.0],
+       [10.0, 11.0, 12.0, 13.0, 14.0],
+       [15.0, 16.0, 17.0, 18.0, 19.0],
+       [20.0, 21.0, 22.0, 23.0, 24.0]], dtype=float64)

From eecd72de0415f63bbf8f0ea0cd90401b41099b68 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= <zvoros@gmail.com>
Date: Wed, 12 Jan 2022 21:24:19 +0100
Subject: [PATCH 08/20] try to fix uninitialised variable issue

---
 code/numpy/io/io.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c
index f44bde2..0923812 100644
--- a/code/numpy/io/io.c
+++ b/code/numpy/io/io.c
@@ -21,8 +21,9 @@
 
 #define ULAB_IO_BUFFER_SIZE         128
 
-#define ULAB_IO_LITTLE_ENDIAN       0
-#define ULAB_IO_BIG_ENDIAN          1
+#define ULAB_IO_NULL_ENDIAN         0
+#define ULAB_IO_LITTLE_ENDIAN       1
+#define ULAB_IO_BIG_ENDIAN          2
 
 #if ULAB_NUMPY_HAS_LOAD
 static void io_read_(mp_obj_t npy, const mp_stream_p_t *fin, char *buffer, char *string, uint16_t len, int *error) {
@@ -80,7 +81,7 @@ static mp_obj_t io_load(mp_obj_t file) {
     uint8_t dtype;
 
     io_read_(npy, fin, buffer, NULL, 1, &error);
-    uint8_t endianness;
+    uint8_t endianness = ULAB_IO_NULL_ENDIAN;
     if(*buffer == '<') {
         endianness = ULAB_IO_LITTLE_ENDIAN;
     } else if(*buffer == '>') {
@@ -171,8 +172,7 @@ static mp_obj_t io_load(mp_obj_t file) {
 
     // strip the rest of the header
     if((bytes_to_read + 51) < header_length) {
-
-        io_read_(npy, fin, buffer, NULL, 1, &error);
+        io_read_(npy, fin, buffer, NULL, header_length - (bytes_to_read + 51), &error);
     }
 
     ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(ndim, shape, dtype);

From 66189452ac26700df828de3853b8e823af60d7e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= <zvoros@gmail.com>
Date: Fri, 14 Jan 2022 14:01:39 +0100
Subject: [PATCH 09/20] always close file before exception, change exception
 type

---
 code/numpy/io/io.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c
index 0923812..527e35d 100644
--- a/code/numpy/io/io.c
+++ b/code/numpy/io/io.c
@@ -109,16 +109,17 @@ static mp_obj_t io_load(mp_obj_t file) {
     #endif
     #if ULAB_SUPPORTS_COMPLEX
     #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
-    else if(memcmp(buffer, "c4", 2) == 0) {
+    else if(memcmp(buffer, "c8", 2) == 0) {
         dtype = NDARRAY_COMPLEX;
     }
     #else
-    else if(memcmp(buffer, "c8", 2) == 0) {
+    else if(memcmp(buffer, "c16", 3) == 0) {
         dtype = NDARRAY_COMPLEX;
     }
     #endif
     #endif /* ULAB_SUPPORT_COPMLEX */
     else {
+        fin->ioctl(npy, MP_STREAM_CLOSE, 0, &error);
         mp_raise_TypeError(translate("wrong dtype"));
     }
 
@@ -162,6 +163,7 @@ static mp_obj_t io_load(mp_obj_t file) {
                 break;
             }
             else {
+                fin->ioctl(npy, MP_STREAM_CLOSE, 0, &error);
                 mp_raise_ValueError(translate("corrupted file"));
             }
             needle++;
@@ -180,6 +182,7 @@ static mp_obj_t io_load(mp_obj_t file) {
 
     size_t read = fin->read(npy, array, ndarray->len * ndarray->itemsize, &error);
     if(read != ndarray->len * ndarray->itemsize) {
+        fin->ioctl(npy, MP_STREAM_CLOSE, 0, &error);
         mp_raise_ValueError(translate("corrupted file"));
     }
 
@@ -287,9 +290,9 @@ static mp_obj_t io_save(mp_obj_t file, mp_obj_t ndarray_) {
             #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
             memcpy(buffer+offset, "c8", 2);
             #else
-            memcpy(buffer+offset, "c16", 2);
-            #endif
+            memcpy(buffer+offset, "c16", 3);
             offset++;
+            #endif
             break;
         #endif
     }

From 7672ab9d44ba80168748d777ce83fd65af78cdec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= <zvoros@gmail.com>
Date: Fri, 14 Jan 2022 14:10:04 +0100
Subject: [PATCH 10/20] change npy, fin, fout to stream, stream_p for
 consistency

---
 code/numpy/io/io.c | 56 +++++++++++++++++++++++-----------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c
index 527e35d..4beefa1 100644
--- a/code/numpy/io/io.c
+++ b/code/numpy/io/io.c
@@ -26,8 +26,8 @@
 #define ULAB_IO_BIG_ENDIAN          2
 
 #if ULAB_NUMPY_HAS_LOAD
-static void io_read_(mp_obj_t npy, const mp_stream_p_t *fin, char *buffer, char *string, uint16_t len, int *error) {
-    size_t read = fin->read(npy, buffer, len, error);
+static void io_read_(mp_obj_t stream, const mp_stream_p_t *stream_p, char *buffer, char *string, uint16_t len, int *error) {
+    size_t read = stream_p->read(stream, buffer, len, error);
     bool fail = false;
     if(read == len) {
         if(string != NULL) {
@@ -39,8 +39,8 @@ static void io_read_(mp_obj_t npy, const mp_stream_p_t *fin, char *buffer, char
         fail = true;
     }
     if(fail) {
-        fin->ioctl(npy, MP_STREAM_CLOSE, 0, error);
-        mp_raise_ValueError(translate("corrupted file"));
+        stream_p->ioctl(stream, MP_STREAM_CLOSE, 0, error);
+        mp_raise_msg(&mp_type_RuntimeError, translate("corrupted file"));
     }
 }
 
@@ -61,26 +61,26 @@ static mp_obj_t io_load(mp_obj_t file) {
         MP_OBJ_NEW_QSTR(MP_QSTR_rb)
     };
 
-    mp_obj_t npy = mp_builtin_open(2, open_args, (mp_map_t *)&mp_const_empty_map);
-    const mp_stream_p_t *fin = mp_get_stream(npy);
+    mp_obj_t stream = mp_builtin_open(2, open_args, (mp_map_t *)&mp_const_empty_map);
+    const mp_stream_p_t *stream_p = mp_get_stream(stream);
 
     // read header
     // magic string
-    io_read_(npy, fin, buffer, "\x93NUMPY", 6, &error);
+    io_read_(stream, stream_p, buffer, "\x93NUMPY", 6, &error);
     // simply discard the version number
-    io_read_(npy, fin, buffer, NULL, 2, &error);
+    io_read_(stream, stream_p, buffer, NULL, 2, &error);
     // header length, represented as a little endian uint16 (0x76, 0x00)
-    io_read_(npy, fin, buffer, NULL, 2, &error);
+    io_read_(stream, stream_p, buffer, NULL, 2, &error);
 
     uint16_t header_length = buffer[1];
     header_length <<= 8;
     header_length += buffer[0];
 
     // beginning of the dictionary describing the array
-    io_read_(npy, fin, buffer, "{'descr': '", 11, &error);
+    io_read_(stream, stream_p, buffer, "{'descr': '", 11, &error);
     uint8_t dtype;
 
-    io_read_(npy, fin, buffer, NULL, 1, &error);
+    io_read_(stream, stream_p, buffer, NULL, 1, &error);
     uint8_t endianness = ULAB_IO_NULL_ENDIAN;
     if(*buffer == '<') {
         endianness = ULAB_IO_LITTLE_ENDIAN;
@@ -88,7 +88,7 @@ static mp_obj_t io_load(mp_obj_t file) {
         endianness = ULAB_IO_BIG_ENDIAN;
     }
 
-    io_read_(npy, fin, buffer, NULL, 2, &error);
+    io_read_(stream, stream_p, buffer, NULL, 2, &error);
     if(memcmp(buffer, "u1", 2) == 0) {
         dtype = NDARRAY_UINT8;
     } else if(memcmp(buffer, "i1", 2) == 0) {
@@ -119,11 +119,11 @@ static mp_obj_t io_load(mp_obj_t file) {
     #endif
     #endif /* ULAB_SUPPORT_COPMLEX */
     else {
-        fin->ioctl(npy, MP_STREAM_CLOSE, 0, &error);
+        stream_p->ioctl(stream, MP_STREAM_CLOSE, 0, &error);
         mp_raise_TypeError(translate("wrong dtype"));
     }
 
-    io_read_(npy, fin, buffer, "', 'fortran_order': False, 'shape': (", 37, &error);
+    io_read_(stream, stream_p, buffer, "', 'fortran_order': False, 'shape': (", 37, &error);
 
     size_t *shape = m_new(size_t, ULAB_MAX_DIMS);
     memset(shape, 0, sizeof(size_t) * ULAB_MAX_DIMS);
@@ -131,7 +131,7 @@ static mp_obj_t io_load(mp_obj_t file) {
     uint16_t bytes_to_read = MIN(ULAB_IO_BUFFER_SIZE, header_length - 51);
     // bytes_to_read is 128 at most. This should be enough to contain a
     // maximum of 4 size_t numbers plus the delimiters
-    io_read_(npy, fin, buffer, NULL, bytes_to_read, &error);
+    io_read_(stream, stream_p, buffer, NULL, bytes_to_read, &error);
     char *needle = buffer;
     uint8_t ndim = 0;
 
@@ -163,8 +163,8 @@ static mp_obj_t io_load(mp_obj_t file) {
                 break;
             }
             else {
-                fin->ioctl(npy, MP_STREAM_CLOSE, 0, &error);
-                mp_raise_ValueError(translate("corrupted file"));
+                stream_p->ioctl(stream, MP_STREAM_CLOSE, 0, &error);
+                mp_raise_msg(&mp_type_RuntimeError, translate("corrupted file"));
             }
             needle++;
         }
@@ -174,19 +174,19 @@ static mp_obj_t io_load(mp_obj_t file) {
 
     // strip the rest of the header
     if((bytes_to_read + 51) < header_length) {
-        io_read_(npy, fin, buffer, NULL, header_length - (bytes_to_read + 51), &error);
+        io_read_(stream, stream_p, buffer, NULL, header_length - (bytes_to_read + 51), &error);
     }
 
     ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(ndim, shape, dtype);
     char *array = (char *)ndarray->array;
 
-    size_t read = fin->read(npy, array, ndarray->len * ndarray->itemsize, &error);
+    size_t read = stream_p->read(stream, array, ndarray->len * ndarray->itemsize, &error);
     if(read != ndarray->len * ndarray->itemsize) {
-        fin->ioctl(npy, MP_STREAM_CLOSE, 0, &error);
-        mp_raise_ValueError(translate("corrupted file"));
+        stream_p->ioctl(stream, MP_STREAM_CLOSE, 0, &error);
+        mp_raise_msg(&mp_type_RuntimeError, translate("corrupted file"));
     }
 
-    fin->ioctl(npy, MP_STREAM_CLOSE, 0, &error);
+    stream_p->ioctl(stream, MP_STREAM_CLOSE, 0, &error);
     m_del(char, buffer, ULAB_IO_BUFFER_SIZE);
 
     // swap the bytes, if necessary
@@ -250,8 +250,8 @@ static mp_obj_t io_save(mp_obj_t file, mp_obj_t ndarray_) {
         MP_OBJ_NEW_QSTR(MP_QSTR_wb)
     };
 
-    mp_obj_t npy = mp_builtin_open(2, open_args, (mp_map_t *)&mp_const_empty_map);
-    const mp_stream_p_t *fout = mp_get_stream(npy);
+    mp_obj_t stream = mp_builtin_open(2, open_args, (mp_map_t *)&mp_const_empty_map);
+    const mp_stream_p_t *stream_p = mp_get_stream(stream);
 
     // write header;
     // magic string + header length, which is always 128 - 10 = 118, represented as a little endian uint16 (0x76, 0x00)
@@ -314,7 +314,7 @@ static mp_obj_t io_save(mp_obj_t file, mp_obj_t ndarray_) {
     // pad with space till the very end
     memset(buffer+offset, 32, ULAB_IO_BUFFER_SIZE - offset - 1);
     buffer[ULAB_IO_BUFFER_SIZE - 1] = '\n';
-    fout->write(npy, buffer, ULAB_IO_BUFFER_SIZE, &error);
+    stream_p->write(stream, buffer, ULAB_IO_BUFFER_SIZE, &error);
 
     // write the array data
     uint8_t sz = ndarray->itemsize;
@@ -339,7 +339,7 @@ static mp_obj_t io_save(mp_obj_t file, mp_obj_t ndarray_) {
                     memcpy(buffer+offset, array, sz);
                     offset += sz;
                     if(offset == ULAB_IO_BUFFER_SIZE) {
-                        fout->write(npy, buffer, offset, &error);
+                        stream_p->write(stream, buffer, offset, &error);
                         offset = 0;
                     }
                     array += ndarray->strides[ULAB_MAX_DIMS - 1];
@@ -364,8 +364,8 @@ static mp_obj_t io_save(mp_obj_t file, mp_obj_t ndarray_) {
     } while(i <  ndarray->shape[ULAB_MAX_DIMS - 4]);
     #endif
 
-    fout->write(npy, buffer, offset, &error);
-    fout->ioctl(npy, MP_STREAM_CLOSE, 0, &error);
+    stream_p->write(stream, buffer, offset, &error);
+    stream_p->ioctl(stream, MP_STREAM_CLOSE, 0, &error);
 
     m_del(char, buffer, ULAB_IO_BUFFER_SIZE);
     return mp_const_none;

From d8ce4b395f17f26140fa836d7ea2a598bef2380e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= <zvoros@gmail.com>
Date: Sat, 8 Jan 2022 20:27:02 +0100
Subject: [PATCH 11/20] rebase from master

---
 code/micropython.mk     |   1 +
 code/numpy/io/io.c      | 163 ++++++++++++++++++++++++++++++++++++++++
 code/numpy/io/io.h      |  16 ++++
 code/numpy/numpy.c      |   4 +
 code/ulab.c             |   2 +-
 code/ulab.h             |   4 +
 docs/ulab-change-log.md |   7 ++
 7 files changed, 196 insertions(+), 1 deletion(-)
 create mode 100644 code/numpy/io/io.c
 create mode 100644 code/numpy/io/io.h

diff --git a/code/micropython.mk b/code/micropython.mk
index d16b177..f36d1d6 100644
--- a/code/micropython.mk
+++ b/code/micropython.mk
@@ -19,6 +19,7 @@ SRC_USERMOD += $(USERMODULES_DIR)/numpy/create.c
 SRC_USERMOD += $(USERMODULES_DIR)/numpy/fft/fft.c
 SRC_USERMOD += $(USERMODULES_DIR)/numpy/fft/fft_tools.c
 SRC_USERMOD += $(USERMODULES_DIR)/numpy/filter.c
+SRC_USERMOD += $(USERMODULES_DIR)/numpy/io/io.c
 SRC_USERMOD += $(USERMODULES_DIR)/numpy/linalg/linalg.c
 SRC_USERMOD += $(USERMODULES_DIR)/numpy/linalg/linalg_tools.c
 SRC_USERMOD += $(USERMODULES_DIR)/numpy/numerical.c
diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c
new file mode 100644
index 0000000..bc51c5b
--- /dev/null
+++ b/code/numpy/io/io.c
@@ -0,0 +1,163 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2022 Zoltán Vörös
+*/
+
+#include <string.h>
+
+#include "py/builtin.h"
+#include "py/obj.h"
+#include "py/runtime.h"
+#include "py/stream.h"
+
+#include "../../ndarray.h"
+#include "io.h"
+
+#define ULAB_IO_BUFFER_SIZE         128
+
+
+static mp_obj_t io_save(mp_obj_t fname, mp_obj_t ndarray_) {
+    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(ndarray_);
+    int error;
+    char *buffer = m_new(char, ULAB_IO_BUFFER_SIZE);
+    uint8_t offset = 0;
+
+    // test for endianness
+    uint16_t x = 1;
+    int8_t endian = (x >> 8) == 1 ? '>' : '<';
+
+    mp_obj_t open_args[2] = {
+        fname,
+        MP_OBJ_NEW_QSTR(MP_QSTR_wb)
+    };
+
+    mp_obj_t npy = mp_builtin_open(2, open_args, (mp_map_t *)&mp_const_empty_map);
+    const mp_stream_p_t *fout = mp_get_stream(npy);
+
+    // write header;
+    // magic string + header length, which is always 128 - 10 = 118, represented as a little endian uint16 (0x76, 0x00)
+    // + beginning of the dictionary describing the array
+    memcpy(buffer, "\x93NUMPY\x01\x00\x76\x00{'descr': '", 21);
+    offset += 21;
+
+    buffer[offset] = endian;
+    if((ndarray->dtype == NDARRAY_UINT8) || (ndarray->dtype == NDARRAY_INT8)) {
+        // for single-byte data, the endianness doesn't matter
+        buffer[offset] = '|';
+    }
+    offset++;
+    switch(ndarray->dtype) {
+        case NDARRAY_UINT8:
+            memcpy(buffer+offset, "u1", 2);
+            break;
+        case NDARRAY_INT8:
+            memcpy(buffer+offset, "i1", 2);
+            break;
+        case NDARRAY_UINT16:
+            memcpy(buffer+offset, "u2", 2);
+            break;
+        case NDARRAY_INT16:
+            memcpy(buffer+offset, "i2", 2);
+            break;
+        case NDARRAY_FLOAT:
+            #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+            memcpy(buffer+offset, "f4", 2);
+            #else
+            memcpy(buffer+offset, "f8", 2);
+            #endif
+            break;
+        #if ULAB_SUPPORTS_COMPLEX
+        case NDARRAY_COMPLEX:
+            #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+            memcpy(buffer+offset, "c8", 2);
+            #else
+            memcpy(buffer+offset, "c16", 2);
+            #endif
+            offset++;
+            break;
+        #endif
+    }
+
+    offset += 2;
+    memcpy(buffer+offset, "', 'fortran_order': False, 'shape': (", 37);
+    offset += 37;
+
+    if(ndarray->ndim == 1) {
+        offset += sprintf(buffer+offset, "%ld,", ndarray->shape[ULAB_MAX_DIMS - 1]);
+    } else {
+        for(uint8_t i = 0; i < ndarray->ndim - 1; i++) {
+            offset += sprintf(buffer+offset, "%ld, ", ndarray->shape[ULAB_MAX_DIMS - i - 1]);
+        }
+        offset += sprintf(buffer+offset, "%ld", ndarray->shape[ULAB_MAX_DIMS - 1]);
+    }
+    memcpy(buffer+offset, "), }", 4);
+    offset += 4;
+    // pad with space till the very end
+    memset(buffer+offset, 32, ULAB_IO_BUFFER_SIZE - offset - 1);
+    buffer[ULAB_IO_BUFFER_SIZE - 1] = '\n';
+    fout->write(npy, buffer, ULAB_IO_BUFFER_SIZE, &error);
+
+    // write the array data
+    uint8_t sz = ndarray->itemsize;
+    offset = 0;
+
+    uint8_t *array = (uint8_t *)ndarray->array;
+
+    // TODO: if flatiter is available, we can save the loop expansion
+    #if ULAB_MAX_DIMS > 3
+    size_t i = 0;
+    do {
+    #endif
+        #if ULAB_MAX_DIMS > 2
+        size_t j = 0;
+        do {
+        #endif
+            #if ULAB_MAX_DIMS > 1
+            size_t k = 0;
+            do {
+            #endif
+                size_t l = 0;
+                do {
+                    memcpy(buffer+offset, array, sz);
+                    offset += sz;
+                    if(offset == ULAB_IO_BUFFER_SIZE) {
+                        fout->write(npy, buffer, offset, &error);
+                        offset = 0;
+                    }
+                    array += ndarray->strides[ULAB_MAX_DIMS - 1];
+                    l++;
+                } while(l <  ndarray->shape[ULAB_MAX_DIMS - 1]);
+            #if ULAB_MAX_DIMS > 1
+                array -= ndarray->strides[ULAB_MAX_DIMS - 1] * ndarray->shape[ULAB_MAX_DIMS-1];
+                array += ndarray->strides[ULAB_MAX_DIMS - 2];
+                k++;
+            } while(k <  ndarray->shape[ULAB_MAX_DIMS - 2]);
+            #endif
+        #if ULAB_MAX_DIMS > 2
+            array -= ndarray->strides[ULAB_MAX_DIMS - 2] * ndarray->shape[ULAB_MAX_DIMS-2];
+            array += ndarray->strides[ULAB_MAX_DIMS - 3];
+            j++;
+        } while(j <  ndarray->shape[ULAB_MAX_DIMS - 3]);
+        #endif
+    #if ULAB_MAX_DIMS > 3
+        array -= ndarray->strides[ULAB_MAX_DIMS - 3] * ndarray->shape[ULAB_MAX_DIMS-3];
+        array += ndarray->strides[ULAB_MAX_DIMS - 4];
+        i++;
+    } while(i <  ndarray->shape[ULAB_MAX_DIMS - 4]);
+    #endif
+
+    if(offset != 0) {
+        fout->write(npy, buffer, offset, &error);
+    }
+
+    m_del(char, buffer, ULAB_IO_BUFFER_SIZE);
+    return mp_const_none;
+}
+
+MP_DEFINE_CONST_FUN_OBJ_2(io_save_obj, io_save);
+
diff --git a/code/numpy/io/io.h b/code/numpy/io/io.h
new file mode 100644
index 0000000..7dcf2b5
--- /dev/null
+++ b/code/numpy/io/io.h
@@ -0,0 +1,16 @@
+/*
+ * This file is part of the micropython-ulab project,
+ *
+ * https://github.com/v923z/micropython-ulab
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2022 Zoltán Vörös
+*/
+
+#ifndef _ULAB_IO_
+#define _ULAB_IO_
+
+MP_DECLARE_CONST_FUN_OBJ_2(io_save_obj);
+
+#endif
\ No newline at end of file
diff --git a/code/numpy/numpy.c b/code/numpy/numpy.c
index 25600db..fe38c29 100644
--- a/code/numpy/numpy.c
+++ b/code/numpy/numpy.c
@@ -23,6 +23,7 @@
 #include "create.h"
 #include "fft/fft.h"
 #include "filter.h"
+#include "io/io.h"
 #include "linalg/linalg.h"
 #include "numerical.h"
 #include "stats.h"
@@ -271,6 +272,9 @@ static const mp_rom_map_elem_t ulab_numpy_globals_table[] = {
     #if ULAB_NUMPY_HAS_ROLL
         { MP_OBJ_NEW_QSTR(MP_QSTR_roll), (mp_obj_t)&numerical_roll_obj },
     #endif
+    #if ULAB_NUMPY_HAS_SAVE
+        { MP_OBJ_NEW_QSTR(MP_QSTR_save), (mp_obj_t)&io_save_obj },
+    #endif
     #if ULAB_NUMPY_HAS_SIZE
         { MP_OBJ_NEW_QSTR(MP_QSTR_size), (mp_obj_t)&transform_size_obj },
     #endif
diff --git a/code/ulab.c b/code/ulab.c
index a65f54a..8d4ae70 100644
--- a/code/ulab.c
+++ b/code/ulab.c
@@ -33,7 +33,7 @@
 #include "user/user.h"
 #include "utils/utils.h"
 
-#define ULAB_VERSION 4.2.0
+#define ULAB_VERSION 4.3.0
 #define xstr(s) str(s)
 #define str(s) #s
 
diff --git a/code/ulab.h b/code/ulab.h
index d5750bc..4446e52 100644
--- a/code/ulab.h
+++ b/code/ulab.h
@@ -486,6 +486,10 @@
 #define ULAB_NUMPY_HAS_ROLL             (1)
 #endif
 
+#ifndef ULAB_NUMPY_HAS_SAVE
+#define ULAB_NUMPY_HAS_SAVE             (1)
+#endif
+
 #ifndef ULAB_NUMPY_HAS_SIZE
 #define ULAB_NUMPY_HAS_SIZE             (1)
 #endif
diff --git a/docs/ulab-change-log.md b/docs/ulab-change-log.md
index 7d9a359..8235b41 100644
--- a/docs/ulab-change-log.md
+++ b/docs/ulab-change-log.md
@@ -1,3 +1,10 @@
+
+Sat, 8 Jan 2022
+
+    version 4.3.0
+
+    implement numpy.save, numpy.load
+
 Fri, 14 Jan 2022
 
 version 4.2.0

From 030463f7ff2a832203a926de693fdf59d2d4e6b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= <zvoros@gmail.com>
Date: Sat, 8 Jan 2022 20:31:00 +0100
Subject: [PATCH 12/20] include stdio.h for mac

---
 code/numpy/io/io.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c
index bc51c5b..eb616e0 100644
--- a/code/numpy/io/io.c
+++ b/code/numpy/io/io.c
@@ -9,6 +9,7 @@
 */
 
 #include <string.h>
+#include <stdio.h>
 
 #include "py/builtin.h"
 #include "py/obj.h"

From 56b3d800e34801c5e758fc58940158110b6d486a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= <zvoros@gmail.com>
Date: Sat, 8 Jan 2022 20:48:30 +0100
Subject: [PATCH 13/20] add input type checking

---
 code/numpy/io/io.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c
index eb616e0..3f804d4 100644
--- a/code/numpy/io/io.c
+++ b/code/numpy/io/io.c
@@ -23,6 +23,10 @@
 
 
 static mp_obj_t io_save(mp_obj_t fname, mp_obj_t ndarray_) {
+    if(!mp_obj_is_str(fname) || !mp_obj_is_type(ndarray_, &ulab_ndarray_type)) {
+        mp_raise_TypeError(translate("wrong input type"));
+    }
+
     ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(ndarray_);
     int error;
     char *buffer = m_new(char, ULAB_IO_BUFFER_SIZE);

From acb81aeded301a2aa7e40c5b1ca0c67d5ad9766e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= <zvoros@gmail.com>
Date: Sun, 9 Jan 2022 20:12:09 +0100
Subject: [PATCH 14/20] fix loop index in numpy.save

---
 code/numpy/io/io.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c
index 3f804d4..4e8639b 100644
--- a/code/numpy/io/io.c
+++ b/code/numpy/io/io.c
@@ -95,8 +95,8 @@ static mp_obj_t io_save(mp_obj_t fname, mp_obj_t ndarray_) {
     if(ndarray->ndim == 1) {
         offset += sprintf(buffer+offset, "%ld,", ndarray->shape[ULAB_MAX_DIMS - 1]);
     } else {
-        for(uint8_t i = 0; i < ndarray->ndim - 1; i++) {
-            offset += sprintf(buffer+offset, "%ld, ", ndarray->shape[ULAB_MAX_DIMS - i - 1]);
+        for(uint8_t i = ndarray->ndim; i > 1; i--) {
+            offset += sprintf(buffer+offset, "%ld, ", ndarray->shape[ULAB_MAX_DIMS - i]);
         }
         offset += sprintf(buffer+offset, "%ld", ndarray->shape[ULAB_MAX_DIMS - 1]);
     }
@@ -113,7 +113,6 @@ static mp_obj_t io_save(mp_obj_t fname, mp_obj_t ndarray_) {
 
     uint8_t *array = (uint8_t *)ndarray->array;
 
-    // TODO: if flatiter is available, we can save the loop expansion
     #if ULAB_MAX_DIMS > 3
     size_t i = 0;
     do {
@@ -156,9 +155,7 @@ static mp_obj_t io_save(mp_obj_t fname, mp_obj_t ndarray_) {
     } while(i <  ndarray->shape[ULAB_MAX_DIMS - 4]);
     #endif
 
-    if(offset != 0) {
-        fout->write(npy, buffer, offset, &error);
-    }
+    fout->write(npy, buffer, offset, &error);
 
     m_del(char, buffer, ULAB_IO_BUFFER_SIZE);
     return mp_const_none;

From 4b0c3215fdeaf4331f70e9b8dfe71b4ff18d84f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= <zvoros@gmail.com>
Date: Tue, 11 Jan 2022 19:18:48 +0100
Subject: [PATCH 15/20] close file in save implementation

---
 code/numpy/io/io.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c
index 4e8639b..16fcd96 100644
--- a/code/numpy/io/io.c
+++ b/code/numpy/io/io.c
@@ -156,6 +156,7 @@ static mp_obj_t io_save(mp_obj_t fname, mp_obj_t ndarray_) {
     #endif
 
     fout->write(npy, buffer, offset, &error);
+    fout->ioctl(npy, MP_STREAM_CLOSE, 0, &error);
 
     m_del(char, buffer, ULAB_IO_BUFFER_SIZE);
     return mp_const_none;

From 834ddc821871c5b011965986093a1858bc1fba75 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= <zvoros@gmail.com>
Date: Wed, 12 Jan 2022 18:10:34 +0100
Subject: [PATCH 16/20] attempt to fix eps32 compilation error

---
 code/numpy/io/io.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c
index 16fcd96..f604c57 100644
--- a/code/numpy/io/io.c
+++ b/code/numpy/io/io.c
@@ -93,12 +93,12 @@ static mp_obj_t io_save(mp_obj_t fname, mp_obj_t ndarray_) {
     offset += 37;
 
     if(ndarray->ndim == 1) {
-        offset += sprintf(buffer+offset, "%ld,", ndarray->shape[ULAB_MAX_DIMS - 1]);
+        offset += sprintf(buffer+offset, "%zu,", ndarray->shape[ULAB_MAX_DIMS - 1]);
     } else {
         for(uint8_t i = ndarray->ndim; i > 1; i--) {
-            offset += sprintf(buffer+offset, "%ld, ", ndarray->shape[ULAB_MAX_DIMS - i]);
+            offset += sprintf(buffer+offset, "%zu, ", ndarray->shape[ULAB_MAX_DIMS - i]);
         }
-        offset += sprintf(buffer+offset, "%ld", ndarray->shape[ULAB_MAX_DIMS - 1]);
+        offset += sprintf(buffer+offset, "%zu", ndarray->shape[ULAB_MAX_DIMS - 1]);
     }
     memcpy(buffer+offset, "), }", 4);
     offset += 4;

From a1f1cd255b7fcf55793eb9a7350f7668592a7dba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= <zvoros@gmail.com>
Date: Wed, 12 Jan 2022 20:17:24 +0100
Subject: [PATCH 17/20] rebasing from master

---
 code/numpy/io/io.c                     | 218 ++++++++++++++++++++++++-
 code/numpy/io/io.h                     |   1 +
 code/numpy/numpy.c                     |   3 +
 code/ulab.h                            |   4 +
 docs/manual/source/conf.py             |   2 +-
 docs/manual/source/numpy-functions.rst |  94 ++++++++---
 docs/numpy-functions.ipynb             |  88 +++++++++-
 docs/ulab-convert.ipynb                |  16 +-
 tests/2d/numpy/load_save.py            |  14 ++
 tests/2d/numpy/load_save.py.exp        |  30 ++++
 10 files changed, 428 insertions(+), 42 deletions(-)
 create mode 100644 tests/2d/numpy/load_save.py
 create mode 100644 tests/2d/numpy/load_save.py.exp

diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c
index f604c57..f44bde2 100644
--- a/code/numpy/io/io.c
+++ b/code/numpy/io/io.c
@@ -21,9 +21,215 @@
 
 #define ULAB_IO_BUFFER_SIZE         128
 
+#define ULAB_IO_LITTLE_ENDIAN       0
+#define ULAB_IO_BIG_ENDIAN          1
 
-static mp_obj_t io_save(mp_obj_t fname, mp_obj_t ndarray_) {
-    if(!mp_obj_is_str(fname) || !mp_obj_is_type(ndarray_, &ulab_ndarray_type)) {
+#if ULAB_NUMPY_HAS_LOAD
+static void io_read_(mp_obj_t npy, const mp_stream_p_t *fin, char *buffer, char *string, uint16_t len, int *error) {
+    size_t read = fin->read(npy, buffer, len, error);
+    bool fail = false;
+    if(read == len) {
+        if(string != NULL) {
+            if(memcmp(buffer, string, len) != 0) {
+                fail = true;
+            }
+        }
+    } else {
+        fail = true;
+    }
+    if(fail) {
+        fin->ioctl(npy, MP_STREAM_CLOSE, 0, error);
+        mp_raise_ValueError(translate("corrupted file"));
+    }
+}
+
+static mp_obj_t io_load(mp_obj_t file) {
+    if(!mp_obj_is_str(file)) {
+        mp_raise_TypeError(translate("wrong input type"));
+    }
+
+    int error;
+    char *buffer = m_new(char, ULAB_IO_BUFFER_SIZE);
+
+    // test for endianness
+    uint16_t x = 1;
+    int8_t native_endianness = (x >> 8) == 1 ? ULAB_IO_BIG_ENDIAN : ULAB_IO_LITTLE_ENDIAN;
+
+    mp_obj_t open_args[2] = {
+        file,
+        MP_OBJ_NEW_QSTR(MP_QSTR_rb)
+    };
+
+    mp_obj_t npy = mp_builtin_open(2, open_args, (mp_map_t *)&mp_const_empty_map);
+    const mp_stream_p_t *fin = mp_get_stream(npy);
+
+    // read header
+    // magic string
+    io_read_(npy, fin, buffer, "\x93NUMPY", 6, &error);
+    // simply discard the version number
+    io_read_(npy, fin, buffer, NULL, 2, &error);
+    // header length, represented as a little endian uint16 (0x76, 0x00)
+    io_read_(npy, fin, buffer, NULL, 2, &error);
+
+    uint16_t header_length = buffer[1];
+    header_length <<= 8;
+    header_length += buffer[0];
+
+    // beginning of the dictionary describing the array
+    io_read_(npy, fin, buffer, "{'descr': '", 11, &error);
+    uint8_t dtype;
+
+    io_read_(npy, fin, buffer, NULL, 1, &error);
+    uint8_t endianness;
+    if(*buffer == '<') {
+        endianness = ULAB_IO_LITTLE_ENDIAN;
+    } else if(*buffer == '>') {
+        endianness = ULAB_IO_BIG_ENDIAN;
+    }
+
+    io_read_(npy, fin, buffer, NULL, 2, &error);
+    if(memcmp(buffer, "u1", 2) == 0) {
+        dtype = NDARRAY_UINT8;
+    } else if(memcmp(buffer, "i1", 2) == 0) {
+        dtype = NDARRAY_INT8;
+    } else if(memcmp(buffer, "u2", 2) == 0) {
+        dtype = NDARRAY_UINT16;
+    } else if(memcmp(buffer, "i2", 2) == 0) {
+        dtype = NDARRAY_INT16;
+    }
+    #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+    else if(memcmp(buffer, "f4", 2) == 0) {
+        dtype = NDARRAY_FLOAT;
+    }
+    #else
+    else if(memcmp(buffer, "f8", 2) == 0) {
+        dtype = NDARRAY_FLOAT;
+    }
+    #endif
+    #if ULAB_SUPPORTS_COMPLEX
+    #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
+    else if(memcmp(buffer, "c4", 2) == 0) {
+        dtype = NDARRAY_COMPLEX;
+    }
+    #else
+    else if(memcmp(buffer, "c8", 2) == 0) {
+        dtype = NDARRAY_COMPLEX;
+    }
+    #endif
+    #endif /* ULAB_SUPPORT_COPMLEX */
+    else {
+        mp_raise_TypeError(translate("wrong dtype"));
+    }
+
+    io_read_(npy, fin, buffer, "', 'fortran_order': False, 'shape': (", 37, &error);
+
+    size_t *shape = m_new(size_t, ULAB_MAX_DIMS);
+    memset(shape, 0, sizeof(size_t) * ULAB_MAX_DIMS);
+
+    uint16_t bytes_to_read = MIN(ULAB_IO_BUFFER_SIZE, header_length - 51);
+    // bytes_to_read is 128 at most. This should be enough to contain a
+    // maximum of 4 size_t numbers plus the delimiters
+    io_read_(npy, fin, buffer, NULL, bytes_to_read, &error);
+    char *needle = buffer;
+    uint8_t ndim = 0;
+
+    // find out the number of dimensions by counting the commas in the string
+    while(1) {
+        if(*needle == ',') {
+            ndim++;
+            if(needle[1] == ')') {
+                break;
+            }
+        } else if((*needle == ')') && (ndim > 0)) {
+            ndim++;
+            break;
+        }
+        needle++;
+    }
+
+    needle = buffer;
+    for(uint8_t i = 0; i < ndim; i++) {
+        size_t number = 0;
+        // trivial number parsing here
+        while(1) {
+            if((*needle == ' ') || (*needle == '\t')) {
+                needle++;
+            }
+            if((*needle > 47) && (*needle < 58)) {
+                number = number * 10 + (*needle - 48);
+            } else if((*needle == ',') || (*needle == ')')) {
+                break;
+            }
+            else {
+                mp_raise_ValueError(translate("corrupted file"));
+            }
+            needle++;
+        }
+        needle++;
+        shape[ULAB_MAX_DIMS - ndim + i] = number;
+    }
+
+    // strip the rest of the header
+    if((bytes_to_read + 51) < header_length) {
+
+        io_read_(npy, fin, buffer, NULL, 1, &error);
+    }
+
+    ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(ndim, shape, dtype);
+    char *array = (char *)ndarray->array;
+
+    size_t read = fin->read(npy, array, ndarray->len * ndarray->itemsize, &error);
+    if(read != ndarray->len * ndarray->itemsize) {
+        mp_raise_ValueError(translate("corrupted file"));
+    }
+
+    fin->ioctl(npy, MP_STREAM_CLOSE, 0, &error);
+    m_del(char, buffer, ULAB_IO_BUFFER_SIZE);
+
+    // swap the bytes, if necessary
+    if((native_endianness != endianness) && (dtype != NDARRAY_UINT8) && (dtype != NDARRAY_INT8)) {
+        uint8_t sz = ndarray->itemsize;
+        char *tmpbuff = NULL;
+
+        #if ULAB_SUPPORTS_COMPLEX
+        if(dtype == NDARRAY_COMPLEX) {
+            // work with the floating point real and imaginary parts
+            sz /= 2;
+            tmpbuff = m_new(char, sz);
+            for(size_t i = 0; i < ndarray->len; i++) {
+                for(uint8_t k = 0; k < 2; k++) {
+                    tmpbuff += sz;
+                    for(uint8_t j = 0; j < sz; j++) {
+                        memcpy(--tmpbuff, array++, 1);
+                    }
+                    memcpy(array-sz, tmpbuff, sz);
+                }
+            }
+        } else {
+        #endif
+            tmpbuff = m_new(char, sz);
+            for(size_t i = 0; i < ndarray->len; i++) {
+                tmpbuff += sz;
+                for(uint8_t j = 0; j < sz; j++) {
+                    memcpy(--tmpbuff, array++, 1);
+                }
+                memcpy(array-sz, tmpbuff, sz);
+            }
+        #if ULAB_SUPPORTS_COMPLEX
+        }
+        #endif
+        m_del(char, tmpbuff, sz);
+    }
+
+    return MP_OBJ_FROM_PTR(ndarray);
+}
+
+MP_DEFINE_CONST_FUN_OBJ_1(io_load_obj, io_load);
+#endif /* ULAB_NUMPY_HAS_LOAD */
+
+#if ULAB_NUMPY_HAS_SAVE
+static mp_obj_t io_save(mp_obj_t file, mp_obj_t ndarray_) {
+    if(!mp_obj_is_str(file) || !mp_obj_is_type(ndarray_, &ulab_ndarray_type)) {
         mp_raise_TypeError(translate("wrong input type"));
     }
 
@@ -34,10 +240,10 @@ static mp_obj_t io_save(mp_obj_t fname, mp_obj_t ndarray_) {
 
     // test for endianness
     uint16_t x = 1;
-    int8_t endian = (x >> 8) == 1 ? '>' : '<';
+    int8_t native_endiannes = (x >> 8) == 1 ? '>' : '<';
 
     mp_obj_t open_args[2] = {
-        fname,
+        file,
         MP_OBJ_NEW_QSTR(MP_QSTR_wb)
     };
 
@@ -50,7 +256,7 @@ static mp_obj_t io_save(mp_obj_t fname, mp_obj_t ndarray_) {
     memcpy(buffer, "\x93NUMPY\x01\x00\x76\x00{'descr': '", 21);
     offset += 21;
 
-    buffer[offset] = endian;
+    buffer[offset] = native_endiannes;
     if((ndarray->dtype == NDARRAY_UINT8) || (ndarray->dtype == NDARRAY_INT8)) {
         // for single-byte data, the endianness doesn't matter
         buffer[offset] = '|';
@@ -163,4 +369,4 @@ static mp_obj_t io_save(mp_obj_t fname, mp_obj_t ndarray_) {
 }
 
 MP_DEFINE_CONST_FUN_OBJ_2(io_save_obj, io_save);
-
+#endif /* ULAB_NUMPY_HAS_SAVE */
diff --git a/code/numpy/io/io.h b/code/numpy/io/io.h
index 7dcf2b5..a9dcdfc 100644
--- a/code/numpy/io/io.h
+++ b/code/numpy/io/io.h
@@ -12,5 +12,6 @@
 #define _ULAB_IO_
 
 MP_DECLARE_CONST_FUN_OBJ_2(io_save_obj);
+MP_DECLARE_CONST_FUN_OBJ_1(io_load_obj);
 
 #endif
\ No newline at end of file
diff --git a/code/numpy/numpy.c b/code/numpy/numpy.c
index fe38c29..1e593b5 100644
--- a/code/numpy/numpy.c
+++ b/code/numpy/numpy.c
@@ -257,6 +257,9 @@ static const mp_rom_map_elem_t ulab_numpy_globals_table[] = {
     #if ULAB_NUMPY_HAS_FLIP
         { MP_OBJ_NEW_QSTR(MP_QSTR_flip), (mp_obj_t)&numerical_flip_obj },
     #endif
+    #if ULAB_NUMPY_HAS_LOAD
+        { MP_OBJ_NEW_QSTR(MP_QSTR_load), (mp_obj_t)&io_load_obj },
+    #endif
     #if ULAB_NUMPY_HAS_MINMAX
         { MP_OBJ_NEW_QSTR(MP_QSTR_max), (mp_obj_t)&numerical_max_obj },
     #endif
diff --git a/code/ulab.h b/code/ulab.h
index 4446e52..a782da2 100644
--- a/code/ulab.h
+++ b/code/ulab.h
@@ -462,6 +462,10 @@
 #define ULAB_NUMPY_HAS_INTERP           (1)
 #endif
 
+#ifndef ULAB_NUMPY_HAS_LOAD
+#define ULAB_NUMPY_HAS_LOAD             (1)
+#endif
+
 #ifndef ULAB_NUMPY_HAS_MEAN
 #define ULAB_NUMPY_HAS_MEAN             (1)
 #endif
diff --git a/docs/manual/source/conf.py b/docs/manual/source/conf.py
index 1275760..44f0134 100644
--- a/docs/manual/source/conf.py
+++ b/docs/manual/source/conf.py
@@ -27,7 +27,7 @@ copyright = '2019-2022, Zoltán Vörös and contributors'
 author = 'Zoltán Vörös'
 
 # The full version, including alpha/beta/rc tags
-release = '4.2.0'
+release = '4.3.0'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/docs/manual/source/numpy-functions.rst b/docs/manual/source/numpy-functions.rst
index a18e12c..a9a8111 100644
--- a/docs/manual/source/numpy-functions.rst
+++ b/docs/manual/source/numpy-functions.rst
@@ -11,7 +11,7 @@ the firmware was compiled with complex support.
 3.  `numpy.argmax <#argmax>`__
 4.  `numpy.argmin <#argmin>`__
 5.  `numpy.argsort <#argsort>`__
-6.  `numpy.asarray <#asarray>`__
+6.  `numpy.asarray\* <#asarray>`__
 7.  `numpy.clip <#clip>`__
 8.  `numpy.compress\* <#compress>`__
 9.  `numpy.conjugate\* <#conjugate>`__
@@ -25,25 +25,27 @@ the firmware was compiled with complex support.
 17. `numpy.interp <#interp>`__
 18. `numpy.isfinite <#isfinite>`__
 19. `numpy.isinf <#isinf>`__
-20. `numpy.max <#max>`__
-21. `numpy.maximum <#maximum>`__
-22. `numpy.mean <#mean>`__
-23. `numpy.median <#median>`__
-24. `numpy.min <#min>`__
-25. `numpy.minimum <#minimum>`__
-26. `numpy.not_equal <#equal>`__
-27. `numpy.polyfit <#polyfit>`__
-28. `numpy.polyval <#polyval>`__
-29. `numpy.real\* <#real>`__
-30. `numpy.roll <#roll>`__
-31. `numpy.size <#size>`__
-32. `numpy.sort <#sort>`__
-33. `numpy.sort_complex\* <#sort_complex>`__
-34. `numpy.std <#std>`__
-35. `numpy.sum <#sum>`__
-36. `numpy.trace <#trace>`__
-37. `numpy.trapz <#trapz>`__
-38. `numpy.where <#where>`__
+20. `numpy.load <#load>`__
+21. `numpy.max <#max>`__
+22. `numpy.maximum <#maximum>`__
+23. `numpy.mean <#mean>`__
+24. `numpy.median <#median>`__
+25. `numpy.min <#min>`__
+26. `numpy.minimum <#minimum>`__
+27. `numpy.not_equal <#equal>`__
+28. `numpy.polyfit <#polyfit>`__
+29. `numpy.polyval <#polyval>`__
+30. `numpy.real\* <#real>`__
+31. `numpy.roll <#roll>`__
+32. `numpy.save <#save>`__
+33. `numpy.size <#size>`__
+34. `numpy.sort <#sort>`__
+35. `numpy.sort_complex\* <#sort_complex>`__
+36. `numpy.std <#std>`__
+37. `numpy.sum <#sum>`__
+38. `numpy.trace <#trace>`__
+39. `numpy.trapz <#trapz>`__
+40. `numpy.where <#where>`__
 
 all
 ---
@@ -982,6 +984,39 @@ positions, where the input is infinite. Integer types return the
     
 
 
+load
+----
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.load.html
+
+The function reads data from a file in ``numpy``\ ’s
+`platform-independent
+format <https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html#module-numpy.lib.format>`__,
+and returns the generated array. If the endianness of the data in the
+file and the microcontroller differ, the bytes are automatically
+swapped.
+
+.. code::
+        
+    # code to be run in micropython
+    
+    from ulab import numpy as np
+    
+    a = np.load('a.npy')
+    print(a)
+
+.. parsed-literal::
+
+    array([[0.0, 1.0, 2.0, 3.0, 4.0],
+           [5.0, 6.0, 7.0, 8.0, 9.0],
+           [10.0, 11.0, 12.0, 13.0, 14.0],
+           [15.0, 16.0, 17.0, 18.0, 19.0],
+           [20.0, 21.0, 22.0, 23.0, 24.0]], dtype=float64)
+    
+    
+
+
 mean
 ----
 
@@ -1430,6 +1465,25 @@ Vertical rolls require two internal copies of single columns.
     
 
 
+save
+----
+
+``numpy``:
+https://docs.scipy.org/doc/numpy/reference/generated/numpy.save.html
+
+With the help of this function, numerical array can be save in
+``numpy``\ ’s `platform-independent
+format <https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html#module-numpy.lib.format>`__.
+
+The function takes two positional arguments, the name of the output
+file, and the array.
+
+.. code::
+
+    # code to be run in CPython
+    
+    a = np.array(range(25)).reshape((5, 5))
+    np.save('a.npy', a)
 size
 ----
 
diff --git a/docs/numpy-functions.ipynb b/docs/numpy-functions.ipynb
index 815e4be..92660e2 100644
--- a/docs/numpy-functions.ipynb
+++ b/docs/numpy-functions.ipynb
@@ -34,8 +34,8 @@
    "execution_count": 1,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-01-14T19:55:15.200755Z",
-     "start_time": "2022-01-14T19:55:15.193656Z"
+     "end_time": "2022-01-15T08:50:03.152522Z",
+     "start_time": "2022-01-15T08:50:03.141317Z"
     }
    },
    "outputs": [],
@@ -52,8 +52,8 @@
    "execution_count": 2,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-01-14T19:55:17.871864Z",
-     "start_time": "2022-01-14T19:55:17.858935Z"
+     "end_time": "2022-01-15T08:50:04.183008Z",
+     "start_time": "2022-01-15T08:50:04.162758Z"
     }
    },
    "outputs": [],
@@ -237,7 +237,7 @@
     "1. [numpy.argmax](#argmax)\n",
     "1. [numpy.argmin](#argmin)\n",
     "1. [numpy.argsort](#argsort)\n",
-    "1. [numpy.asarray](#asarray)\n",
+    "1. [numpy.asarray*](#asarray)\n",
     "1. [numpy.clip](#clip)\n",
     "1. [numpy.compress*](#compress)\n",
     "1. [numpy.conjugate*](#conjugate)\n",
@@ -251,6 +251,7 @@
     "1. [numpy.interp](#interp)\n",
     "1. [numpy.isfinite](#isfinite)\n",
     "1. [numpy.isinf](#isinf)\n",
+    "1. [numpy.load](#load)\n",
     "1. [numpy.max](#max)\n",
     "1. [numpy.maximum](#maximum)\n",
     "1. [numpy.mean](#mean)\n",
@@ -262,6 +263,7 @@
     "1. [numpy.polyval](#polyval)\n",
     "1. [numpy.real*](#real)\n",
     "1. [numpy.roll](#roll)\n",
+    "1. [numpy.save](#save)\n",
     "1. [numpy.size](#size)\n",
     "1. [numpy.sort](#sort)\n",
     "1. [numpy.sort_complex*](#sort_complex)\n",
@@ -1418,6 +1420,50 @@
     "print('\\nisinf(c):\\n', np.isinf(c))"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## load\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.load.html\n",
+    "\n",
+    "The function reads data from a file in `numpy`'s [platform-independent format](https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html#module-numpy.lib.format), and returns the generated array. If the endianness of the data in the file and the microcontroller differ, the bytes are automatically swapped."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-12T19:11:10.361592Z",
+     "start_time": "2022-01-12T19:11:10.342439Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "array([[0.0, 1.0, 2.0, 3.0, 4.0],\n",
+      "       [5.0, 6.0, 7.0, 8.0, 9.0],\n",
+      "       [10.0, 11.0, 12.0, 13.0, 14.0],\n",
+      "       [15.0, 16.0, 17.0, 18.0, 19.0],\n",
+      "       [20.0, 21.0, 22.0, 23.0, 24.0]], dtype=float64)\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%micropython -unix 1\n",
+    "\n",
+    "from ulab import numpy as np\n",
+    "\n",
+    "a = np.load('a.npy')\n",
+    "print(a)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -1999,6 +2045,34 @@
     "print(\"\\na rolled with None:\\n\", a)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## save\n",
+    "\n",
+    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.save.html\n",
+    "\n",
+    "With the help of this function, numerical array can be save in `numpy`'s [platform-independent format](https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html#module-numpy.lib.format).\n",
+    "\n",
+    "The function takes two positional arguments, the name of the output file, and the array. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-01-15T08:51:08.827144Z",
+     "start_time": "2022-01-15T08:51:08.813813Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "a = np.array(range(25)).reshape((5, 5))\n",
+    "np.save('a.npy', a)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -2013,8 +2087,8 @@
    "execution_count": 3,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-01-14T19:58:44.044501Z",
-     "start_time": "2022-01-14T19:58:44.034585Z"
+     "end_time": "2022-01-15T08:50:57.254168Z",
+     "start_time": "2022-01-15T08:50:57.245772Z"
     }
    },
    "outputs": [
diff --git a/docs/ulab-convert.ipynb b/docs/ulab-convert.ipynb
index 3da26ed..389ec55 100644
--- a/docs/ulab-convert.ipynb
+++ b/docs/ulab-convert.ipynb
@@ -17,8 +17,8 @@
    "execution_count": 1,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-01-14T19:54:52.171096Z",
-     "start_time": "2022-01-14T19:54:52.162815Z"
+     "end_time": "2022-01-15T08:48:23.883953Z",
+     "start_time": "2022-01-15T08:48:23.877040Z"
     }
    },
    "outputs": [
@@ -61,7 +61,7 @@
     "author = 'Zoltán Vörös'\n",
     "\n",
     "# The full version, including alpha/beta/rc tags\n",
-    "release = '4.2.0'\n",
+    "release = '4.3.0'\n",
     "\n",
     "\n",
     "# -- General configuration ---------------------------------------------------\n",
@@ -218,8 +218,8 @@
    "execution_count": 2,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-01-14T20:05:37.425494Z",
-     "start_time": "2022-01-14T20:05:35.620545Z"
+     "end_time": "2022-01-15T08:48:32.207113Z",
+     "start_time": "2022-01-15T08:48:32.051714Z"
     }
    },
    "outputs": [],
@@ -256,11 +256,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-01-14T20:06:04.832792Z",
-     "start_time": "2022-01-14T20:06:00.259738Z"
+     "end_time": "2022-01-15T08:52:20.686225Z",
+     "start_time": "2022-01-15T08:52:16.125014Z"
     }
    },
    "outputs": [],
diff --git a/tests/2d/numpy/load_save.py b/tests/2d/numpy/load_save.py
new file mode 100644
index 0000000..6fb9d2a
--- /dev/null
+++ b/tests/2d/numpy/load_save.py
@@ -0,0 +1,14 @@
+try:
+    from ulab import numpy as np
+except:
+    import numpy as np
+
+dtypes = (np.uint8, np.int8, np.uint16, np.int16, np.float)
+
+for dtype in dtypes:
+    a = np.array(range(25), dtype=dtype)
+    b = a.reshape((5, 5))
+    np.save('out.npy', a)
+    print(np.load('out.npy'))
+    np.save('out.npy', b)
+    print(np.load('out.npy'))
diff --git a/tests/2d/numpy/load_save.py.exp b/tests/2d/numpy/load_save.py.exp
new file mode 100644
index 0000000..71ca601
--- /dev/null
+++ b/tests/2d/numpy/load_save.py.exp
@@ -0,0 +1,30 @@
+array([0, 1, 2, ..., 22, 23, 24], dtype=uint8)
+array([[0, 1, 2, 3, 4],
+       [5, 6, 7, 8, 9],
+       [10, 11, 12, 13, 14],
+       [15, 16, 17, 18, 19],
+       [20, 21, 22, 23, 24]], dtype=uint8)
+array([0, 1, 2, ..., 22, 23, 24], dtype=int8)
+array([[0, 1, 2, 3, 4],
+       [5, 6, 7, 8, 9],
+       [10, 11, 12, 13, 14],
+       [15, 16, 17, 18, 19],
+       [20, 21, 22, 23, 24]], dtype=int8)
+array([0, 1, 2, ..., 22, 23, 24], dtype=uint16)
+array([[0, 1, 2, 3, 4],
+       [5, 6, 7, 8, 9],
+       [10, 11, 12, 13, 14],
+       [15, 16, 17, 18, 19],
+       [20, 21, 22, 23, 24]], dtype=uint16)
+array([0, 1, 2, ..., 22, 23, 24], dtype=int16)
+array([[0, 1, 2, 3, 4],
+       [5, 6, 7, 8, 9],
+       [10, 11, 12, 13, 14],
+       [15, 16, 17, 18, 19],
+       [20, 21, 22, 23, 24]], dtype=int16)
+array([0.0, 1.0, 2.0, ..., 22.0, 23.0, 24.0], dtype=float64)
+array([[0.0, 1.0, 2.0, 3.0, 4.0],
+       [5.0, 6.0, 7.0, 8.0, 9.0],
+       [10.0, 11.0, 12.0, 13.0, 14.0],
+       [15.0, 16.0, 17.0, 18.0, 19.0],
+       [20.0, 21.0, 22.0, 23.0, 24.0]], dtype=float64)

From b9d46c7f92a58d5cf62c2df1d9e5660874cda0a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= <zvoros@gmail.com>
Date: Wed, 12 Jan 2022 21:24:19 +0100
Subject: [PATCH 18/20] try to fix uninitialised variable issue

---
 code/numpy/io/io.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c
index f44bde2..0923812 100644
--- a/code/numpy/io/io.c
+++ b/code/numpy/io/io.c
@@ -21,8 +21,9 @@
 
 #define ULAB_IO_BUFFER_SIZE         128
 
-#define ULAB_IO_LITTLE_ENDIAN       0
-#define ULAB_IO_BIG_ENDIAN          1
+#define ULAB_IO_NULL_ENDIAN         0
+#define ULAB_IO_LITTLE_ENDIAN       1
+#define ULAB_IO_BIG_ENDIAN          2
 
 #if ULAB_NUMPY_HAS_LOAD
 static void io_read_(mp_obj_t npy, const mp_stream_p_t *fin, char *buffer, char *string, uint16_t len, int *error) {
@@ -80,7 +81,7 @@ static mp_obj_t io_load(mp_obj_t file) {
     uint8_t dtype;
 
     io_read_(npy, fin, buffer, NULL, 1, &error);
-    uint8_t endianness;
+    uint8_t endianness = ULAB_IO_NULL_ENDIAN;
     if(*buffer == '<') {
         endianness = ULAB_IO_LITTLE_ENDIAN;
     } else if(*buffer == '>') {
@@ -171,8 +172,7 @@ static mp_obj_t io_load(mp_obj_t file) {
 
     // strip the rest of the header
     if((bytes_to_read + 51) < header_length) {
-
-        io_read_(npy, fin, buffer, NULL, 1, &error);
+        io_read_(npy, fin, buffer, NULL, header_length - (bytes_to_read + 51), &error);
     }
 
     ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(ndim, shape, dtype);

From 97d2c2c3c5a44271cf0899d5e1ea5755b1152486 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= <zvoros@gmail.com>
Date: Fri, 14 Jan 2022 14:01:39 +0100
Subject: [PATCH 19/20] always close file before exception, change exception
 type

---
 code/numpy/io/io.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c
index 0923812..527e35d 100644
--- a/code/numpy/io/io.c
+++ b/code/numpy/io/io.c
@@ -109,16 +109,17 @@ static mp_obj_t io_load(mp_obj_t file) {
     #endif
     #if ULAB_SUPPORTS_COMPLEX
     #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
-    else if(memcmp(buffer, "c4", 2) == 0) {
+    else if(memcmp(buffer, "c8", 2) == 0) {
         dtype = NDARRAY_COMPLEX;
     }
     #else
-    else if(memcmp(buffer, "c8", 2) == 0) {
+    else if(memcmp(buffer, "c16", 3) == 0) {
         dtype = NDARRAY_COMPLEX;
     }
     #endif
     #endif /* ULAB_SUPPORT_COPMLEX */
     else {
+        fin->ioctl(npy, MP_STREAM_CLOSE, 0, &error);
         mp_raise_TypeError(translate("wrong dtype"));
     }
 
@@ -162,6 +163,7 @@ static mp_obj_t io_load(mp_obj_t file) {
                 break;
             }
             else {
+                fin->ioctl(npy, MP_STREAM_CLOSE, 0, &error);
                 mp_raise_ValueError(translate("corrupted file"));
             }
             needle++;
@@ -180,6 +182,7 @@ static mp_obj_t io_load(mp_obj_t file) {
 
     size_t read = fin->read(npy, array, ndarray->len * ndarray->itemsize, &error);
     if(read != ndarray->len * ndarray->itemsize) {
+        fin->ioctl(npy, MP_STREAM_CLOSE, 0, &error);
         mp_raise_ValueError(translate("corrupted file"));
     }
 
@@ -287,9 +290,9 @@ static mp_obj_t io_save(mp_obj_t file, mp_obj_t ndarray_) {
             #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
             memcpy(buffer+offset, "c8", 2);
             #else
-            memcpy(buffer+offset, "c16", 2);
-            #endif
+            memcpy(buffer+offset, "c16", 3);
             offset++;
+            #endif
             break;
         #endif
     }

From 024b3ea6e8e7b23cd5d3a2b7e7a32c6797238a20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= <zvoros@gmail.com>
Date: Fri, 14 Jan 2022 14:10:04 +0100
Subject: [PATCH 20/20] change npy, fin, fout to stream, stream_p for
 consistency

---
 code/numpy/io/io.c | 56 +++++++++++++++++++++++-----------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c
index 527e35d..4beefa1 100644
--- a/code/numpy/io/io.c
+++ b/code/numpy/io/io.c
@@ -26,8 +26,8 @@
 #define ULAB_IO_BIG_ENDIAN          2
 
 #if ULAB_NUMPY_HAS_LOAD
-static void io_read_(mp_obj_t npy, const mp_stream_p_t *fin, char *buffer, char *string, uint16_t len, int *error) {
-    size_t read = fin->read(npy, buffer, len, error);
+static void io_read_(mp_obj_t stream, const mp_stream_p_t *stream_p, char *buffer, char *string, uint16_t len, int *error) {
+    size_t read = stream_p->read(stream, buffer, len, error);
     bool fail = false;
     if(read == len) {
         if(string != NULL) {
@@ -39,8 +39,8 @@ static void io_read_(mp_obj_t npy, const mp_stream_p_t *fin, char *buffer, char
         fail = true;
     }
     if(fail) {
-        fin->ioctl(npy, MP_STREAM_CLOSE, 0, error);
-        mp_raise_ValueError(translate("corrupted file"));
+        stream_p->ioctl(stream, MP_STREAM_CLOSE, 0, error);
+        mp_raise_msg(&mp_type_RuntimeError, translate("corrupted file"));
     }
 }
 
@@ -61,26 +61,26 @@ static mp_obj_t io_load(mp_obj_t file) {
         MP_OBJ_NEW_QSTR(MP_QSTR_rb)
     };
 
-    mp_obj_t npy = mp_builtin_open(2, open_args, (mp_map_t *)&mp_const_empty_map);
-    const mp_stream_p_t *fin = mp_get_stream(npy);
+    mp_obj_t stream = mp_builtin_open(2, open_args, (mp_map_t *)&mp_const_empty_map);
+    const mp_stream_p_t *stream_p = mp_get_stream(stream);
 
     // read header
     // magic string
-    io_read_(npy, fin, buffer, "\x93NUMPY", 6, &error);
+    io_read_(stream, stream_p, buffer, "\x93NUMPY", 6, &error);
     // simply discard the version number
-    io_read_(npy, fin, buffer, NULL, 2, &error);
+    io_read_(stream, stream_p, buffer, NULL, 2, &error);
     // header length, represented as a little endian uint16 (0x76, 0x00)
-    io_read_(npy, fin, buffer, NULL, 2, &error);
+    io_read_(stream, stream_p, buffer, NULL, 2, &error);
 
     uint16_t header_length = buffer[1];
     header_length <<= 8;
     header_length += buffer[0];
 
     // beginning of the dictionary describing the array
-    io_read_(npy, fin, buffer, "{'descr': '", 11, &error);
+    io_read_(stream, stream_p, buffer, "{'descr': '", 11, &error);
     uint8_t dtype;
 
-    io_read_(npy, fin, buffer, NULL, 1, &error);
+    io_read_(stream, stream_p, buffer, NULL, 1, &error);
     uint8_t endianness = ULAB_IO_NULL_ENDIAN;
     if(*buffer == '<') {
         endianness = ULAB_IO_LITTLE_ENDIAN;
@@ -88,7 +88,7 @@ static mp_obj_t io_load(mp_obj_t file) {
         endianness = ULAB_IO_BIG_ENDIAN;
     }
 
-    io_read_(npy, fin, buffer, NULL, 2, &error);
+    io_read_(stream, stream_p, buffer, NULL, 2, &error);
     if(memcmp(buffer, "u1", 2) == 0) {
         dtype = NDARRAY_UINT8;
     } else if(memcmp(buffer, "i1", 2) == 0) {
@@ -119,11 +119,11 @@ static mp_obj_t io_load(mp_obj_t file) {
     #endif
     #endif /* ULAB_SUPPORT_COPMLEX */
     else {
-        fin->ioctl(npy, MP_STREAM_CLOSE, 0, &error);
+        stream_p->ioctl(stream, MP_STREAM_CLOSE, 0, &error);
         mp_raise_TypeError(translate("wrong dtype"));
     }
 
-    io_read_(npy, fin, buffer, "', 'fortran_order': False, 'shape': (", 37, &error);
+    io_read_(stream, stream_p, buffer, "', 'fortran_order': False, 'shape': (", 37, &error);
 
     size_t *shape = m_new(size_t, ULAB_MAX_DIMS);
     memset(shape, 0, sizeof(size_t) * ULAB_MAX_DIMS);
@@ -131,7 +131,7 @@ static mp_obj_t io_load(mp_obj_t file) {
     uint16_t bytes_to_read = MIN(ULAB_IO_BUFFER_SIZE, header_length - 51);
     // bytes_to_read is 128 at most. This should be enough to contain a
     // maximum of 4 size_t numbers plus the delimiters
-    io_read_(npy, fin, buffer, NULL, bytes_to_read, &error);
+    io_read_(stream, stream_p, buffer, NULL, bytes_to_read, &error);
     char *needle = buffer;
     uint8_t ndim = 0;
 
@@ -163,8 +163,8 @@ static mp_obj_t io_load(mp_obj_t file) {
                 break;
             }
             else {
-                fin->ioctl(npy, MP_STREAM_CLOSE, 0, &error);
-                mp_raise_ValueError(translate("corrupted file"));
+                stream_p->ioctl(stream, MP_STREAM_CLOSE, 0, &error);
+                mp_raise_msg(&mp_type_RuntimeError, translate("corrupted file"));
             }
             needle++;
         }
@@ -174,19 +174,19 @@ static mp_obj_t io_load(mp_obj_t file) {
 
     // strip the rest of the header
     if((bytes_to_read + 51) < header_length) {
-        io_read_(npy, fin, buffer, NULL, header_length - (bytes_to_read + 51), &error);
+        io_read_(stream, stream_p, buffer, NULL, header_length - (bytes_to_read + 51), &error);
     }
 
     ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(ndim, shape, dtype);
     char *array = (char *)ndarray->array;
 
-    size_t read = fin->read(npy, array, ndarray->len * ndarray->itemsize, &error);
+    size_t read = stream_p->read(stream, array, ndarray->len * ndarray->itemsize, &error);
     if(read != ndarray->len * ndarray->itemsize) {
-        fin->ioctl(npy, MP_STREAM_CLOSE, 0, &error);
-        mp_raise_ValueError(translate("corrupted file"));
+        stream_p->ioctl(stream, MP_STREAM_CLOSE, 0, &error);
+        mp_raise_msg(&mp_type_RuntimeError, translate("corrupted file"));
     }
 
-    fin->ioctl(npy, MP_STREAM_CLOSE, 0, &error);
+    stream_p->ioctl(stream, MP_STREAM_CLOSE, 0, &error);
     m_del(char, buffer, ULAB_IO_BUFFER_SIZE);
 
     // swap the bytes, if necessary
@@ -250,8 +250,8 @@ static mp_obj_t io_save(mp_obj_t file, mp_obj_t ndarray_) {
         MP_OBJ_NEW_QSTR(MP_QSTR_wb)
     };
 
-    mp_obj_t npy = mp_builtin_open(2, open_args, (mp_map_t *)&mp_const_empty_map);
-    const mp_stream_p_t *fout = mp_get_stream(npy);
+    mp_obj_t stream = mp_builtin_open(2, open_args, (mp_map_t *)&mp_const_empty_map);
+    const mp_stream_p_t *stream_p = mp_get_stream(stream);
 
     // write header;
     // magic string + header length, which is always 128 - 10 = 118, represented as a little endian uint16 (0x76, 0x00)
@@ -314,7 +314,7 @@ static mp_obj_t io_save(mp_obj_t file, mp_obj_t ndarray_) {
     // pad with space till the very end
     memset(buffer+offset, 32, ULAB_IO_BUFFER_SIZE - offset - 1);
     buffer[ULAB_IO_BUFFER_SIZE - 1] = '\n';
-    fout->write(npy, buffer, ULAB_IO_BUFFER_SIZE, &error);
+    stream_p->write(stream, buffer, ULAB_IO_BUFFER_SIZE, &error);
 
     // write the array data
     uint8_t sz = ndarray->itemsize;
@@ -339,7 +339,7 @@ static mp_obj_t io_save(mp_obj_t file, mp_obj_t ndarray_) {
                     memcpy(buffer+offset, array, sz);
                     offset += sz;
                     if(offset == ULAB_IO_BUFFER_SIZE) {
-                        fout->write(npy, buffer, offset, &error);
+                        stream_p->write(stream, buffer, offset, &error);
                         offset = 0;
                     }
                     array += ndarray->strides[ULAB_MAX_DIMS - 1];
@@ -364,8 +364,8 @@ static mp_obj_t io_save(mp_obj_t file, mp_obj_t ndarray_) {
     } while(i <  ndarray->shape[ULAB_MAX_DIMS - 4]);
     #endif
 
-    fout->write(npy, buffer, offset, &error);
-    fout->ioctl(npy, MP_STREAM_CLOSE, 0, &error);
+    stream_p->write(stream, buffer, offset, &error);
+    stream_p->ioctl(stream, MP_STREAM_CLOSE, 0, &error);
 
     m_del(char, buffer, ULAB_IO_BUFFER_SIZE);
     return mp_const_none;