circuitpython-ulab/docs/ulab.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-03T21:07:14.683785Z",
     "start_time": "2019-12-03T21:07:13.416266Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Populating the interactive namespace from numpy and matplotlib\n"
     ]
    }
   ],
   "source": [
    "%pylab inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Environmental settings and magic commands"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-31T10:01:42.188080Z",
     "start_time": "2019-12-31T10:01:42.180823Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[Errno 2] No such file or directory: '../../micropython/ports/unix/'\n",
      "/home/v923z/sandbox/micropython/v1.11/micropython/ports/unix\n"
     ]
    }
   ],
   "source": [
    "%cd ../../micropython/ports/unix/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-31T10:01:45.226936Z",
     "start_time": "2019-12-31T10:01:45.212099Z"
    }
   },
   "outputs": [],
   "source": [
    "from IPython.core.magic import Magics, magics_class, line_cell_magic\n",
    "from IPython.core.magic import cell_magic, register_cell_magic, register_line_magic\n",
    "from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring\n",
    "import subprocess\n",
    "import os"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## micropython magic command\n",
    "\n",
    "The following magic class takes the content of a cell, and depending on the arguments, either passes it to the unix, or the stm32 implementation. In the latter case, a pyboard must be connected to the computer, and must be initialised beforehand. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-31T10:01:49.824012Z",
     "start_time": "2019-12-31T10:01:49.794627Z"
    }
   },
   "outputs": [],
   "source": [
    "@magics_class\n",
    "class PyboardMagic(Magics):\n",
    "    @cell_magic\n",
    "    @magic_arguments()\n",
    "    @argument('-skip')\n",
    "    @argument('-unix')\n",
    "    @argument('-file')\n",
    "    @argument('-data')\n",
    "    @argument('-time')\n",
    "    @argument('-memory')\n",
    "    def micropython(self, line='', cell=None):\n",
    "        args = parse_argstring(self.micropython, line)\n",
    "        if args.skip: # doesn't care about the cell's content\n",
    "            print('skipped execution')\n",
    "            return None # do not parse the rest\n",
    "        if args.unix: # tests the code on the unix port. Note that this works on unix only\n",
    "            with open('/dev/shm/micropython.py', 'w') as fout:\n",
    "                fout.write(cell)\n",
    "            proc = subprocess.Popen([\"./micropython\", \"/dev/shm/micropython.py\"], \n",
    "                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
    "            print(proc.stdout.read().decode(\"utf-8\"))\n",
    "            print(proc.stderr.read().decode(\"utf-8\"))\n",
    "            return None\n",
    "        if args.file: # can be used to copy the cell content onto the pyboard's flash\n",
    "            spaces = \"    \"\n",
    "            try:\n",
    "                with open(args.file, 'w') as fout:\n",
    "                    fout.write(cell.replace('\\t', spaces))\n",
    "                    print('written cell to {}'.format(args.file))\n",
    "            except:\n",
    "                print('Failed to write to disc!')\n",
    "            return None # do not parse the rest\n",
    "        if args.data: # can be used to load data from the pyboard directly into kernel space\n",
    "            message = pyb.exec(cell)\n",
    "            if len(message) == 0:\n",
    "                print('pyboard >>>')\n",
    "            else:\n",
    "                print(message.decode('utf-8'))\n",
    "                # register new variable in user namespace\n",
    "                self.shell.user_ns[args.data] = string_to_matrix(message.decode(\"utf-8\"))\n",
    "        \n",
    "        if args.time: # measures the time of executions\n",
    "            pyb.exec('import utime')\n",
    "            message = pyb.exec('t = utime.ticks_us()\\n' + cell + '\\ndelta = utime.ticks_diff(utime.ticks_us(), t)' + \n",
    "                               \"\\nprint('execution time: {:d} us'.format(delta))\")\n",
    "            print(message.decode('utf-8'))\n",
    "        \n",
    "        if args.memory: # prints out memory information \n",
    "            message = pyb.exec('from micropython import mem_info\\nprint(mem_info())\\n')\n",
    "            print(\"memory before execution:\\n========================\\n\", message.decode('utf-8'))\n",
    "            message = pyb.exec(cell)\n",
    "            print(\">>> \", message.decode('utf-8'))\n",
    "            message = pyb.exec('print(mem_info())')\n",
    "            print(\"memory after execution:\\n========================\\n\", message.decode('utf-8'))\n",
    "\n",
    "        else:\n",
    "            message = pyb.exec(cell)\n",
    "            print(message.decode('utf-8'))\n",
    "\n",
    "ip = get_ipython()\n",
    "ip.register_magics(PyboardMagic)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Code magic\n",
    "\n",
    "The following cell magic simply writes a licence header, and the contents of the cell to the file given in the header of the cell. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-31T10:01:54.560886Z",
     "start_time": "2019-12-31T10:01:54.544712Z"
    }
   },
   "outputs": [],
   "source": [
    "@magics_class\n",
    "class MyMagics(Magics):\n",
    "        \n",
    "    @cell_magic\n",
    "    def ccode(self, line, cell):\n",
    "        copyright = \"\"\"/*\n",
    " * This file is part of the micropython-ulab project, \n",
    " *\n",
    " * https://github.com/v923z/micropython-ulab\n",
    " *\n",
    " * The MIT License (MIT)\n",
    " *\n",
    " * Copyright (c) 2019 Zoltán Vörös\n",
    "*/\n",
    "    \"\"\"\n",
    "        if line:\n",
    "            with open('../../../ulab/code/'+line, 'w') as cout:\n",
    "                cout.write(copyright)\n",
    "                cout.write(cell)\n",
    "            print('written %d bytes to %s'%(len(copyright) + len(cell), line))\n",
    "            return None\n",
    "\n",
    "ip = get_ipython()\n",
    "ip.register_magics(MyMagics)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# The ndarray type"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## General comments\n",
    "\n",
    "`ndarrays` are efficient containers of numerical data of the same type (i.e., signed/unsigned chars, signed/unsigned integers or `float`s, which, depending on the platform, can also be `double`s). Beyond storing the actual data, the type definition has a couple of additional members (on top of the `base` type). Namely, the number of dimensions (`size_t`), the shape (`*size_t`), the strides (`*int32_t`), the offset of the data pointer (`size_t`), the total length of the array (`size_t`), and a `uint8_t`, which determines, whether the arrays is to be treated as a set of Booleans.\n",
    "The type definition is as follows:\n",
    "\n",
    "```c\n",
    "typedef struct _ndarray_obj_t {\n",
    "    mp_obj_base_t base;\n",
    "    uint8_t boolean;\n",
    "    uint8_t ndim;\n",
    "    size_t *shape;\n",
    "    int32_t *strides;\n",
    "    size_t offset;\n",
    "    size_t len;\n",
    "    mp_obj_array_t *array;\n",
    "} ndarray_obj_t;\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## ndarray.h"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-31T10:02:00.293394Z",
     "start_time": "2019-12-31T10:02:00.281214Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "written 4471 bytes to ndarray.h\n"
     ]
    }
   ],
   "source": [
    "%%ccode ndarray.h\n",
    "\n",
    "#ifndef _NDARRAY_\n",
    "#define _NDARRAY_\n",
    "\n",
    "#include \"py/objarray.h\"\n",
    "#include \"py/binary.h\"\n",
    "#include \"py/objstr.h\"\n",
    "#include \"py/objlist.h\"\n",
    "\n",
    "#define SWAP(t, a, b) { t tmp = a; a = b; b = tmp; }\n",
    "\n",
    "#define NDARRAY_NUMERIC   0\n",
    "#define NDARRAY_BOOLEAN   1\n",
    "\n",
    "#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT\n",
    "#define FLOAT_TYPECODE 'f'\n",
    "#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE\n",
    "#define FLOAT_TYPECODE 'd'\n",
    "#endif\n",
    "\n",
    "extern const mp_obj_type_t ulab_ndarray_type;\n",
    "\n",
    "enum NDARRAY_TYPE {\n",
    "    NDARRAY_BOOL = '?', // this must never be assigned to the typecode!\n",
    "    NDARRAY_UINT8 = 'B',\n",
    "    NDARRAY_INT8 = 'b',\n",
    "    NDARRAY_UINT16 = 'H', \n",
    "    NDARRAY_INT16 = 'h',\n",
    "    NDARRAY_FLOAT = FLOAT_TYPECODE,\n",
    "};\n",
    "\n",
    "typedef struct _ndarray_obj_t {\n",
    "    mp_obj_base_t base;\n",
    "    uint8_t boolean;\n",
    "    uint8_t ndim;\n",
    "    size_t *shape;\n",
    "    int32_t *strides;\n",
    "    size_t len;\n",
    "    size_t offset;\n",
    "    mp_obj_array_t *array;\n",
    "} ndarray_obj_t;\n",
    "\n",
    "// this is a helper structure, so that we can return shape AND strides from a function\n",
    "typedef struct _ndarray_header_obj_t {\n",
    "    size_t *shape;\n",
    "    int32_t *strides;\n",
    "    int8_t axis;\n",
    "    size_t offset;\n",
    "} ndarray_header_obj_t;\n",
    "\n",
    "// various helper functions\n",
    "size_t ndarray_index_from_flat(size_t , ndarray_obj_t *, int32_t *);\n",
    "size_t ndarray_index_from_contracted(size_t  , ndarray_obj_t * , int32_t * , uint8_t  , uint8_t  );\n",
    "mp_float_t ndarray_get_float_value(void *, uint8_t , size_t );\n",
    "\n",
    "// calculates the index (in the original linear array) of an item, if the index in the flat array is given\n",
    "// this is the macro equivalent of ndarray_index_from_flat()\n",
    "// TODO: This fails, when the last stride is not 1!!!\n",
    "#define NDARRAY_INDEX_FROM_FLAT(ndarray, shape_strides, index, _tindex, _nindex) do {\\\n",
    "    size_t Q;\\\n",
    "    (_tindex) = (index);\\\n",
    "    (_nindex) = (ndarray)->offset;\\\n",
    "    for(size_t _x=0; _x < (ndarray)->ndim; _x++) {\\\n",
    "        Q = (_tindex) / (shape_strides)[_x];\\\n",
    "        (_tindex) -= Q * (shape_strides)[_x];\\\n",
    "        (_nindex) += Q * (ndarray)->strides[_x];\\\n",
    "    }\\\n",
    "} while(0)\n",
    "\n",
    "#define NDARRAY_INDEX_FROM_FLAT2(ndarray, stride_array, shape_strides, index, _tindex, _nindex) do {\\\n",
    "    size_t Q;\\\n",
    "    (_tindex) = (index);\\\n",
    "    (_nindex) = (ndarray)->offset;\\\n",
    "    for(size_t _x=0; _x < (ndarray)->ndim; _x++) {\\\n",
    "        Q = (_tindex) / (shape_strides)[_x];\\\n",
    "        (_tindex) -= Q * (shape_strides)[_x];\\\n",
    "        (_nindex) += Q * (stride_array)[_x];\\\n",
    "    }\\\n",
    "} while(0)\n",
    "    \n",
    "#define CREATE_SINGLE_ITEM(ndarray, type, typecode, value) do {\\\n",
    "    (ndarray) = ndarray_new_linear_array(1, (typecode));\\\n",
    "    type *tmparr = (type *)(ndarray)->array->items;\\\n",
    "    tmparr[0] = (type)(value);\\\n",
    "} while(0)\n",
    "\n",
    "#define RUN_BINARY_LOOP(ndarray, typecode, type_out, type_left, type_right, lhs, rhs, shape, ndim, operator) do {\\\n",
    "    uint8_t *left = (uint8_t *)(lhs)->array->items;\\\n",
    "    uint8_t *right = (uint8_t *)(rhs)->array->items;\\\n",
    "    (ndarray) = ndarray_new_dense_ndarray((ndim), (shape), (typecode));\\\n",
    "    uint8_t size_left = sizeof(type_left), size_right = sizeof(type_right);\\\n",
    "    type_out *out = (type_out *)ndarray->array->items;\\\n",
    "    for(size_t i=0; i < (lhs)->len; i++) {\\\n",
    "        out[i] = *(type_left *)left + *(type_right *)right;\\\n",
    "        left += size_left; right += size_right;\\\n",
    "    }\\\n",
    "} while(0)\n",
    "\n",
    "mp_obj_t mp_obj_new_ndarray_iterator(mp_obj_t , size_t , mp_obj_iter_buf_t *);\n",
    "void ndarray_print(const mp_print_t *, mp_obj_t , mp_print_kind_t );\n",
    "ndarray_obj_t *ndarray_new_ndarray(uint8_t , size_t *, int32_t *, uint8_t );\n",
    "ndarray_obj_t *ndarray_new_dense_ndarray(uint8_t , size_t *, uint8_t );\n",
    "ndarray_obj_t *ndarray_new_linear_array(size_t , uint8_t );\n",
    "ndarray_obj_t *ndarray_copy_view(ndarray_obj_t *, uint8_t );\n",
    "\n",
    "mp_obj_t ndarray_copy(mp_obj_t );\n",
    "mp_obj_t ndarray_make_new(const mp_obj_type_t *, size_t , size_t , const mp_obj_t *);\n",
    "mp_obj_t ndarray_subscr(mp_obj_t , mp_obj_t , mp_obj_t );\n",
    "mp_obj_t ndarray_getiter(mp_obj_t , mp_obj_iter_buf_t *);\n",
    "mp_obj_t ndarray_binary_op(mp_binary_op_t , mp_obj_t , mp_obj_t );\n",
    "mp_obj_t ndarray_unary_op(mp_unary_op_t , mp_obj_t );\n",
    "\n",
    "mp_obj_t ndarray_shape(mp_obj_t );\n",
    "mp_obj_t ndarray_reshape(mp_obj_t , mp_obj_t );\n",
    "mp_obj_t ndarray_transpose(mp_obj_t );\n",
    "mp_obj_t ndarray_flatten(size_t , const mp_obj_t *, mp_map_t *);\n",
    "mp_obj_t ndarray_itemsize(mp_obj_t );\n",
    "mp_obj_t ndarray_strides(mp_obj_t );\n",
    "mp_obj_t ndarray_info(mp_obj_t );\n",
    "\n",
    "#endif"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## ndarray.c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 459,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-13T22:22:07.994967Z",
     "start_time": "2019-12-13T22:22:07.973676Z"
    },
    "code_folding": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "written 44617 bytes to ndarray.c\n"
     ]
    }
   ],
   "source": [
    "%%ccode ndarray.c\n",
    "\n",
    "#include <math.h>\n",
    "#include <stdio.h>\n",
    "#include <stdlib.h>\n",
    "#include <string.h>\n",
    "#include \"py/runtime.h\"\n",
    "#include \"py/binary.h\"\n",
    "#include \"py/obj.h\"\n",
    "#include \"py/objtuple.h\"\n",
    "#include \"ndarray.h\"\n",
    "\n",
    "// This function is copied verbatim from objarray.c\n",
    "STATIC mp_obj_array_t *array_new(char typecode, size_t n) {\n",
    "    int typecode_size = mp_binary_get_size('@', typecode, NULL);\n",
    "    mp_obj_array_t *o = m_new_obj(mp_obj_array_t);\n",
    "    // this step could probably be skipped: we are never going to store a bytearray per se\n",
    "    #if MICROPY_PY_BUILTINS_BYTEARRAY && MICROPY_PY_ARRAY\n",
    "    o->base.type = (typecode == BYTEARRAY_TYPECODE) ? &mp_type_bytearray : &mp_type_array;\n",
    "    #elif MICROPY_PY_BUILTINS_BYTEARRAY\n",
    "    o->base.type = &mp_type_bytearray;\n",
    "    #else\n",
    "    o->base.type = &mp_type_array;\n",
    "    #endif\n",
    "    o->typecode = typecode;\n",
    "    o->free = 0;\n",
    "    o->len = n;\n",
    "    o->items = m_new(byte, typecode_size * o->len);\n",
    "    return o;\n",
    "}\n",
    "\n",
    "// helper functions\n",
    "mp_float_t ndarray_get_float_value(void *data, uint8_t typecode, size_t index) {\n",
    "    if(typecode == NDARRAY_UINT8) {\n",
    "        return (mp_float_t)((uint8_t *)data)[index];\n",
    "    } else if(typecode == NDARRAY_INT8) {\n",
    "        return (mp_float_t)((int8_t *)data)[index];\n",
    "    } else if(typecode == NDARRAY_UINT16) {\n",
    "        return (mp_float_t)((uint16_t *)data)[index];\n",
    "    } else if(typecode == NDARRAY_INT16) {\n",
    "        return (mp_float_t)((int16_t *)data)[index];\n",
    "    } else {\n",
    "        return (mp_float_t)((mp_float_t *)data)[index];\n",
    "    }\n",
    "}\n",
    "\n",
    "size_t ndarray_index_from_contracted(size_t index, ndarray_obj_t *ndarray, int32_t *strides, uint8_t ndim, uint8_t axis) {\n",
    "    // calculates the index in the original (linear) array from the index in the contracted (linear) array\n",
    "    size_t q, new_index = 0;\n",
    "    for(size_t i=0; i <= ndim-1; i++) {\n",
    "        q = index / strides[i];\n",
    "        if(i < axis) { \n",
    "            new_index += q * ndarray->strides[i];\n",
    "        } else {\n",
    "            new_index += q * ndarray->strides[i+1];            \n",
    "        }\n",
    "        index -= q * strides[i];\n",
    "    }\n",
    "    return new_index + ndarray->offset;\n",
    "}\n",
    "\n",
    "size_t *ndarray_new_coords(uint8_t ndim) {\n",
    "    size_t *coords = m_new(size_t, ndim);\n",
    "    memset(coords, 0, ndim*sizeof(size_t));\n",
    "    return coords;\n",
    "}\n",
    "\n",
    "void ndarray_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) {\n",
    "    (void)kind;\n",
    "    ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);\n",
    "    size_t offset = self->offset;\n",
    "    uint8_t print_extra = self->ndim;\n",
    "    size_t *coords = ndarray_new_coords(self->ndim);\n",
    "    \n",
    "    mp_print_str(print, \"array(\");\n",
    "        \n",
    "    for(size_t i=0; i < self->len; i++) {\n",
    "        for(uint8_t j=0; j < print_extra; j++) {\n",
    "            mp_print_str(print, \"[\");\n",
    "        }\n",
    "        print_extra = 0;\n",
    "        if(!self->boolean) {\n",
    "            mp_obj_print_helper(print, mp_binary_get_val_array(self->array->typecode, self->array->items, offset), PRINT_REPR);\n",
    "        } else {\n",
    "            if(((uint8_t *)self->array->items)[offset]) {\n",
    "                mp_print_str(print, \"True\");\n",
    "            } else {\n",
    "                mp_print_str(print, \"False\");\n",
    "            }\n",
    "        }\n",
    "        offset += self->strides[self->ndim-1];\n",
    "        coords[self->ndim-1] += 1;\n",
    "        if(coords[self->ndim-1] != self->shape[self->ndim-1]) {\n",
    "            mp_print_str(print, \", \");\n",
    "        }\n",
    "        for(uint8_t j=self->ndim-1; j > 0; j--) {\n",
    "            if(coords[j] == self->shape[j]) {\n",
    "                offset -= self->shape[j] * self->strides[j];\n",
    "                offset += self->strides[j-1];\n",
    "                print_extra += 1;\n",
    "                coords[j] = 0;\n",
    "                coords[j-1] += 1;\n",
    "                mp_print_str(print, \"]\");\n",
    "            } else { // coordinates can change only, if the last coordinate changes\n",
    "                break;\n",
    "            }\n",
    "        }\n",
    "        if(print_extra && (i != self->len-1)) {\n",
    "            mp_print_str(print, \",\\n\");\n",
    "            if(print_extra > 1) {\n",
    "                mp_print_str(print, \"\\n\");\n",
    "            }\n",
    "        }\n",
    "    }\n",
    "    mp_print_str(print, \"]\");\n",
    "    m_del(size_t, coords, self->ndim);\n",
    "\n",
    "    if(self->boolean) {\n",
    "        mp_print_str(print, \", dtype=bool)\");\n",
    "    } else if(self->array->typecode == NDARRAY_UINT8) {\n",
    "        mp_print_str(print, \", dtype=uint8)\");\n",
    "    } else if(self->array->typecode == NDARRAY_INT8) {\n",
    "        mp_print_str(print, \", dtype=int8)\");\n",
    "    } else if(self->array->typecode == NDARRAY_UINT16) {\n",
    "        mp_print_str(print, \", dtype=uint16)\");\n",
    "    } else if(self->array->typecode == NDARRAY_INT16) {\n",
    "        mp_print_str(print, \", dtype=int16)\");\n",
    "    } else if(self->array->typecode == NDARRAY_FLOAT) {\n",
    "        mp_print_str(print, \", dtype=float)\");\n",
    "    }\n",
    "}\n",
    "\n",
    "ndarray_obj_t *ndarray_new_ndarray(uint8_t ndim, size_t *shape, int32_t *strides, uint8_t typecode) {\n",
    "    // Creates the base ndarray with shape, and initialises the values to straight 0s\n",
    "    ndarray_obj_t *ndarray = m_new_obj(ndarray_obj_t);\n",
    "    ndarray->base.type = &ulab_ndarray_type;\n",
    "    ndarray->ndim = ndim;\n",
    "    ndarray->shape = shape;\n",
    "    ndarray->strides = strides;\n",
    "    ndarray->offset = 0;\n",
    "    if(typecode == NDARRAY_BOOL) {\n",
    "        typecode = NDARRAY_UINT8;\n",
    "        ndarray->boolean = NDARRAY_BOOLEAN;\n",
    "    } else {\n",
    "        ndarray->boolean = NDARRAY_NUMERIC;\n",
    "    }\n",
    "    ndarray->len = 1;\n",
    "    for(uint8_t i=0; i < ndim; i++) {\n",
    "        ndarray->len *= shape[i];\n",
    "    }\n",
    "    mp_obj_array_t *array = array_new(typecode, ndarray->len);\n",
    "    // this should set all elements to 0, irrespective of the of the typecode (all bits are zero)\n",
    "    // we could, perhaps, leave this step out, and initialise the array only, when needed\n",
    "    memset(array->items, 0, array->len); \n",
    "    ndarray->array = array;\n",
    "    return ndarray;\n",
    "}\n",
    "\n",
    "ndarray_obj_t *ndarray_new_dense_ndarray(uint8_t ndim, size_t *shape, uint8_t typecode) {\n",
    "    // creates a dense array, i.e., one, where the strides are derived directly from the shapes\n",
    "    int32_t *strides = m_new(int32_t, ndim);\n",
    "    strides[ndim-1] = 1;\n",
    "    for(size_t i=ndim-1; i > 0; i--) {\n",
    "        strides[i-1] = strides[i] * shape[i];\n",
    "    }\n",
    "    return ndarray_new_ndarray(ndim, shape, strides, typecode);\n",
    "}\n",
    "\n",
    "ndarray_obj_t *ndarray_new_view(mp_obj_array_t *array, uint8_t ndim, size_t *shape, int32_t *strides, size_t offset, uint8_t boolean) {\n",
    "    ndarray_obj_t *ndarray = m_new_obj(ndarray_obj_t);\n",
    "    ndarray->base.type = &ulab_ndarray_type;\n",
    "    ndarray->boolean = boolean;\n",
    "    ndarray->ndim = ndim;\n",
    "    ndarray->shape = shape;\n",
    "    ndarray->strides = strides;\n",
    "    ndarray->len = 1;\n",
    "    for(uint8_t i=0; i < ndim; i++) {\n",
    "        ndarray->len *= shape[i];\n",
    "    }    \n",
    "    ndarray->offset = offset;\n",
    "    ndarray->array = array;\n",
    "    return ndarray;\n",
    "}\n",
    "\n",
    "ndarray_obj_t *ndarray_copy_view(ndarray_obj_t *input, uint8_t typecode) {\n",
    "    // Creates a new ndarray from the input\n",
    "    // If the input was a sliced view, the output will inherit the shape, but not the strides\n",
    "\n",
    "    int32_t *strides = m_new(int32_t, input->ndim);\n",
    "    strides[input->ndim-1] = 1;\n",
    "    for(uint8_t i=input->ndim-1; i > 0; i--) {\n",
    "        strides[i-1] = strides[i] * input->shape[i];\n",
    "    }\n",
    "    ndarray_obj_t *ndarray = ndarray_new_ndarray(input->ndim, input->shape, strides, typecode);\n",
    "    ndarray->boolean = input->boolean;\n",
    "    \n",
    "    mp_obj_t item;\n",
    "    size_t offset = input->offset;\n",
    "    size_t *coords = ndarray_new_coords(input->ndim);\n",
    "    \n",
    "    for(size_t i=0; i < ndarray->len; i++) {\n",
    "        item = mp_binary_get_val_array(input->array->typecode, input->array->items, offset);\n",
    "        mp_binary_set_val_array(typecode, ndarray->array->items, i, item);\n",
    "        offset += input->strides[input->ndim-1];\n",
    "        coords[input->ndim-1] += 1;\n",
    "        for(uint8_t j=ndarray->ndim-1; j > 0; j--) {\n",
    "            if(coords[j] == input->shape[j]) {\n",
    "                offset -= input->shape[j] * input->strides[j];\n",
    "                offset += input->strides[j-1];\n",
    "                coords[j] = 0;\n",
    "                coords[j-1] += 1;\n",
    "            } else { // coordinates can change only, if the last coordinate changes\n",
    "                break;\n",
    "            }\n",
    "        }\n",
    "    }\n",
    "    m_del(size_t, coords, input->ndim);\n",
    "    return ndarray;\n",
    "}\n",
    "\n",
    "ndarray_obj_t *ndarray_new_linear_array(size_t len, uint8_t dtype) {\n",
    "    size_t *shape = m_new(size_t, 1);\n",
    "    int32_t *strides = m_new(int32_t, 1);\n",
    "    shape[0] = len;\n",
    "    strides[0] = 1;\n",
    "    return ndarray_new_ndarray(1, shape, strides, dtype);\n",
    "}\n",
    "\n",
    "STATIC uint8_t ndarray_init_helper(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {\n",
    "    static const mp_arg_t allowed_args[] = {\n",
    "        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_PTR(&mp_const_none_obj)} },\n",
    "        { MP_QSTR_dtype, MP_ARG_KW_ONLY | MP_ARG_INT, {.u_int = NDARRAY_FLOAT } },\n",
    "    };\n",
    "    \n",
    "    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];\n",
    "    mp_arg_parse_all(1, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);\n",
    "    \n",
    "    uint8_t dtype = args[1].u_int;\n",
    "    // at this point, dtype can still be `?` for Boolean arrays\n",
    "    return dtype;\n",
    "}\n",
    "\n",
    "mp_obj_t ndarray_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_kw, const mp_obj_t *args) {\n",
    "    // TODO: implement dtype, and copy keywords\n",
    "    mp_arg_check_num(n_args, n_kw, 1, 2, true);\n",
    "    mp_map_t kw_args;\n",
    "    mp_map_init_fixed_table(&kw_args, n_kw, args + n_args);\n",
    "    uint8_t dtype = ndarray_init_helper(n_args, args, &kw_args);\n",
    "\n",
    "    mp_obj_t len_in = mp_obj_len_maybe(args[0]);\n",
    "    size_t len = MP_OBJ_SMALL_INT_VALUE(len_in);\n",
    "    ndarray_obj_t *self, *ndarray;\n",
    "    \n",
    "    if(MP_OBJ_IS_TYPE(args[0], &ulab_ndarray_type)) {\n",
    "        ndarray = MP_OBJ_TO_PTR(args[0]);\n",
    "        self = ndarray_copy_view(ndarray, dtype);\n",
    "        return MP_OBJ_FROM_PTR(self);\n",
    "    }\n",
    "    // work with a single dimension for now\n",
    "    self = ndarray_new_linear_array(len, dtype);\n",
    "    \n",
    "    size_t i=0;\n",
    "    mp_obj_iter_buf_t iter_buf;\n",
    "    mp_obj_t item, iterable = mp_getiter(args[0], &iter_buf);\n",
    "    if(self->boolean) {\n",
    "        uint8_t *array = (uint8_t *)self->array->items;\n",
    "        while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {\n",
    "            if(mp_obj_get_float(item)) {\n",
    "                *array = 1;\n",
    "            }\n",
    "            array++;\n",
    "        }\n",
    "    } else {\n",
    "        while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {\n",
    "            mp_binary_set_val_array(dtype, self->array->items, i++, item);\n",
    "        }\n",
    "    }\n",
    "    return MP_OBJ_FROM_PTR(self);\n",
    "}\n",
    "\n",
    "mp_bound_slice_t generate_slice(mp_uint_t n, mp_obj_t index) {\n",
    "    // micropython seems to have difficulties with negative steps\n",
    "    mp_bound_slice_t slice;\n",
    "    if(MP_OBJ_IS_TYPE(index, &mp_type_slice)) {\n",
    "        mp_seq_get_fast_slice_indexes(n, index, &slice);\n",
    "    } else if(mp_obj_is_int(index)) {\n",
    "        int32_t _index = mp_obj_get_int(index);\n",
    "        if(_index < 0) {\n",
    "            _index += n;\n",
    "        } \n",
    "        if((_index >= n) || (_index < 0)) {\n",
    "            mp_raise_msg(&mp_type_IndexError, \"index is out of bounds\");\n",
    "        }\n",
    "        slice.start = _index;\n",
    "        slice.stop = _index + 1;\n",
    "        slice.step = 1;\n",
    "    } else {\n",
    "        mp_raise_msg(&mp_type_IndexError, \"indices must be integers, slices, or Boolean lists\");\n",
    "    }\n",
    "    return slice;\n",
    "}\n",
    "\n",
    "size_t slice_length(mp_bound_slice_t slice) {\n",
    "    int32_t len, correction = 1;\n",
    "    if(slice.step > 0) correction = -1;\n",
    "    len = (slice.stop - slice.start + (slice.step + correction)) / slice.step;\n",
    "    if(len < 0) return 0;\n",
    "    return (size_t)len;\n",
    "}\n",
    "\n",
    "ndarray_obj_t *ndarray_new_view_from_tuple(ndarray_obj_t *ndarray, mp_obj_tuple_t *slices) {\n",
    "    mp_bound_slice_t slice;\n",
    "    size_t *shape_array = m_new(size_t, ndarray->ndim);\n",
    "    int32_t *strides_array = m_new(int32_t, ndarray->ndim);\n",
    "    size_t offset = ndarray->offset;\n",
    "    for(uint8_t i=0; i < ndarray->ndim; i++) {\n",
    "        if(i < slices->len) {\n",
    "            slice = generate_slice(ndarray->shape[i], slices->items[i]);\n",
    "            offset += ndarray->offset + slice.start * ndarray->strides[i];\n",
    "            shape_array[i] = slice_length(slice);\n",
    "            strides_array[i] = ndarray->strides[i] * slice.step;\n",
    "        } else {\n",
    "            shape_array[i] = ndarray->shape[i];\n",
    "            strides_array[i] = ndarray->strides[i]; \n",
    "        }\n",
    "    }\n",
    "    return ndarray_new_view(ndarray->array, ndarray->ndim, shape_array, strides_array, offset, ndarray->boolean);\n",
    "}\n",
    "    \n",
    "bool ndarray_check_compatibility(ndarray_obj_t *lhs, ndarray_obj_t *rhs) {\n",
    "    if(rhs->ndim > lhs->ndim) {\n",
    "        return false;\n",
    "    } \n",
    "    for(uint8_t i=0; i < rhs->ndim; i++) {\n",
    "        if((rhs->shape[rhs->ndim-1-i] != 1) && (rhs->shape[rhs->ndim-1-i] != lhs->shape[lhs->ndim-1-i])) {\n",
    "            return false;\n",
    "        }\n",
    "    }\n",
    "    return true;\n",
    "}\n",
    "\n",
    "mp_obj_t ndarray_assign_view_from_tuple(ndarray_obj_t *ndarray, mp_obj_tuple_t *slices, mp_obj_t value) {\n",
    "    ndarray_obj_t *lhs = ndarray_new_view_from_tuple(ndarray, slices);\n",
    "    ndarray_obj_t *rhs;\n",
    "    if(MP_OBJ_IS_TYPE(value, &ulab_ndarray_type)) {\n",
    "        rhs = MP_OBJ_TO_PTR(value);\n",
    "        // since this is an assignment, the left hand side should definitely be able to contain the right hand side\n",
    "        if(!ndarray_check_compatibility(lhs, rhs)) {\n",
    "            mp_raise_ValueError(\"could not broadcast input array into output array\");\n",
    "        }\n",
    "    } else { // we have a scalar, so create an ndarray for it\n",
    "        size_t *shape = m_new(size_t, lhs->ndim*sizeof(size_t));\n",
    "        for(uint8_t i=0; i < lhs->ndim; i++) {\n",
    "            shape[i] = 1;\n",
    "        }\n",
    "        rhs = ndarray_new_dense_ndarray(lhs->ndim, shape, lhs->array->typecode);\n",
    "        mp_binary_set_val_array(rhs->array->typecode, rhs->array->items, 0, value);\n",
    "    }\n",
    "    size_t roffset = rhs->offset;\n",
    "    size_t loffset = lhs->offset;\n",
    "    size_t *lcoords = ndarray_new_coords(lhs->ndim);\n",
    "    mp_obj_t item;\n",
    "    uint8_t diff_ndim = lhs->ndim - rhs->ndim;\n",
    "    for(size_t i=0; i < lhs->len; i++) {\n",
    "        item = mp_binary_get_val_array(rhs->array->typecode, rhs->array->items, roffset);\n",
    "        mp_binary_set_val_array(lhs->array->typecode, lhs->array->items, loffset, item);\n",
    "        for(uint8_t j=lhs->ndim-1; j > 0; j--) {\n",
    "            loffset += lhs->strides[j];\n",
    "            lcoords[j] += 1;\n",
    "            if(j >= diff_ndim) {\n",
    "                if(rhs->shape[j-diff_ndim] != 1) {\n",
    "                    roffset += rhs->strides[j-diff_ndim];\n",
    "                }\n",
    "            }\n",
    "            if(lcoords[j] == lhs->shape[j]) { // we are at a dimension boundary\n",
    "                if(j > diff_ndim) { // this means right-hand-side coordinates that haven't been prepended\n",
    "                    if(rhs->shape[j-diff_ndim] != 1) {\n",
    "                        // if rhs->shape[j-diff_ndim] != 1, then rhs->shape[j-diff_ndim] == lhs->shape[j],\n",
    "                        // so we can advance the offset counter\n",
    "                        roffset -= rhs->shape[j-diff_ndim] * rhs->strides[j-diff_ndim];\n",
    "                        roffset += rhs->strides[j-diff_ndim-1];\n",
    "                    } else { // rhs->shape[j-diff_ndim] == 1\n",
    "                        roffset -= rhs->strides[j-diff_ndim];\n",
    "                    }\n",
    "                } else {\n",
    "                    roffset = rhs->offset;\n",
    "                }\n",
    "                loffset -= lhs->shape[j] * lhs->strides[j];\n",
    "                loffset += lhs->strides[j-1];\n",
    "                lcoords[j] = 0;\n",
    "                lcoords[j-1] += 1;\n",
    "            } else { // coordinates can change only, if the last coordinate changes\n",
    "                break;\n",
    "            }\n",
    "        }\n",
    "    }\n",
    "    m_del(size_t, lcoords, lhs->ndim);\n",
    "    return mp_const_none;\n",
    "}\n",
    "\n",
    "mp_obj_t ndarray_subscr(mp_obj_t self, mp_obj_t index, mp_obj_t value) {\n",
    "    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(self);\n",
    "    \n",
    "    if (value == MP_OBJ_SENTINEL) { // return value(s)\n",
    "        if(mp_obj_is_int(index) || MP_OBJ_IS_TYPE(index, &mp_type_slice)) {\n",
    "            mp_obj_t *items = m_new(mp_obj_t, 1);\n",
    "            items[0] = index;\n",
    "            mp_obj_t tuple = mp_obj_new_tuple(1, items);\n",
    "            return ndarray_new_view_from_tuple(ndarray, tuple);\n",
    "        }\n",
    "        // first, check, whether all members of the tuple are integer scalars, or slices\n",
    "        if(MP_OBJ_IS_TYPE(index, &mp_type_tuple)) {\n",
    "            mp_obj_tuple_t *tuple = MP_OBJ_TO_PTR(index);\n",
    "            for(uint8_t i=0; i < tuple->len; i++) {\n",
    "                if(!MP_OBJ_IS_TYPE(tuple->items[i], &mp_type_slice) && !mp_obj_is_int(tuple->items[i])) {\n",
    "                    // TODO: we have to return a copy here\n",
    "                    mp_raise_msg(&mp_type_IndexError, \"wrong index type\");\n",
    "                }\n",
    "            }\n",
    "            // now we know that we can return a view\n",
    "            ndarray_obj_t *result = ndarray_new_view_from_tuple(ndarray, tuple);\n",
    "            return MP_OBJ_FROM_PTR(result);\n",
    "        }\n",
    "    } else { // assignment; the value must be an ndarray, or a scalar\n",
    "        if(!MP_OBJ_IS_TYPE(value, &ulab_ndarray_type) && !mp_obj_is_int(value) && !mp_obj_is_float(value)) {\n",
    "            mp_raise_ValueError(\"right hand side must be an ndarray, or a scalar\");\n",
    "        } else {\n",
    "            if(mp_obj_is_int(index) || MP_OBJ_IS_TYPE(index, &mp_type_slice)) {\n",
    "                mp_obj_t *items = m_new(mp_obj_t, 1);\n",
    "                items[0] = index;\n",
    "                mp_obj_t tuple = mp_obj_new_tuple(1, items);\n",
    "                return ndarray_assign_view_from_tuple(ndarray, tuple, value);\n",
    "            } \n",
    "            if(MP_OBJ_IS_TYPE(index, &mp_type_tuple)) {\n",
    "                mp_obj_tuple_t *tuple = MP_OBJ_TO_PTR(index);\n",
    "                for(uint8_t i=0; i < tuple->len; i++) {\n",
    "                    if(!MP_OBJ_IS_TYPE(tuple->items[i], &mp_type_slice) && !mp_obj_is_int(tuple->items[i])) {\n",
    "                        // TODO: we have to return a copy here\n",
    "                        mp_raise_msg(&mp_type_IndexError, \"wrong index type\");\n",
    "                    }\n",
    "                }\n",
    "                return ndarray_assign_view_from_tuple(ndarray, tuple, value);\n",
    "            } else {\n",
    "                mp_raise_NotImplementedError(\"wrong index type\");\n",
    "            }\n",
    "        }\n",
    "    }\n",
    "    return mp_const_none;\n",
    "}\n",
    "\n",
    "// itarray iterator\n",
    "mp_obj_t ndarray_getiter(mp_obj_t o_in, mp_obj_iter_buf_t *iter_buf) {\n",
    "    return mp_obj_new_ndarray_iterator(o_in, 0, iter_buf);\n",
    "}\n",
    "\n",
    "typedef struct _mp_obj_ndarray_it_t {\n",
    "    mp_obj_base_t base;\n",
    "    mp_fun_1_t iternext;\n",
    "    mp_obj_t ndarray;\n",
    "    size_t cur;\n",
    "} mp_obj_ndarray_it_t;\n",
    "\n",
    "mp_obj_t ndarray_iternext(mp_obj_t self_in) {\n",
    "    mp_obj_ndarray_it_t *self = MP_OBJ_TO_PTR(self_in);\n",
    "    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(self->ndarray);\n",
    "    size_t iter_end = ndarray->shape[0];\n",
    "    if(self->cur < iter_end) {\n",
    "        if(ndarray->ndim == 1) { // we have a linear array\n",
    "            // read the current value\n",
    "            self->cur++;\n",
    "            return mp_binary_get_val_array(ndarray->array->typecode, ndarray->array->items, self->cur-1);\n",
    "        } else { // we have a tensor, return the reduced view\n",
    "            size_t offset = ndarray->offset + self->cur * ndarray->strides[0];\n",
    "            ndarray_obj_t *value = ndarray_new_view(ndarray->array, ndarray->ndim-1, ndarray->shape+1, ndarray->strides+1, offset, ndarray->boolean);\n",
    "            self->cur++;\n",
    "            return MP_OBJ_FROM_PTR(value);\n",
    "        }\n",
    "    } else {\n",
    "        return MP_OBJ_STOP_ITERATION;\n",
    "    }\n",
    "}\n",
    "\n",
    "mp_obj_t mp_obj_new_ndarray_iterator(mp_obj_t ndarray, size_t cur, mp_obj_iter_buf_t *iter_buf) {\n",
    "    assert(sizeof(mp_obj_ndarray_it_t) <= sizeof(mp_obj_iter_buf_t));\n",
    "    mp_obj_ndarray_it_t *o = (mp_obj_ndarray_it_t*)iter_buf;\n",
    "    o->base.type = &mp_type_polymorph_iter;\n",
    "    o->iternext = ndarray_iternext;\n",
    "    o->ndarray = ndarray;\n",
    "    o->cur = cur;\n",
    "    return MP_OBJ_FROM_PTR(o);\n",
    "}\n",
    "\n",
    "mp_obj_t ndarray_shape(mp_obj_t self_in) {\n",
    "    ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);\n",
    "    mp_obj_t *items = m_new(mp_obj_t, self->ndim);\n",
    "    size_t *shape = (size_t *)self->shape;\n",
    "    for(uint8_t i=0; i < self->ndim; i++) {\n",
    "        items[i] = mp_obj_new_int(shape[i]);\n",
    "    }\n",
    "    mp_obj_t tuple = mp_obj_new_tuple(self->ndim, items);\n",
    "    m_del(mp_obj_t, items, self->ndim);\n",
    "    return tuple;\n",
    "}\n",
    "\n",
    "mp_obj_t ndarray_strides(mp_obj_t self_in) {\n",
    "    ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);\n",
    "    mp_obj_t *items = m_new(mp_obj_t, self->ndim);\n",
    "    int32_t *strides = (int32_t *)self->strides;\n",
    "    for(int8_t i=0; i < self->ndim; i++) {\n",
    "        items[i] = mp_obj_new_int(strides[i]);\n",
    "    }\n",
    "    mp_obj_t tuple = mp_obj_new_tuple(self->ndim, items);\n",
    "    m_del(mp_obj_t, items, self->ndim);\n",
    "    return tuple;\n",
    "}\n",
    "\n",
    "mp_obj_t ndarray_info(mp_obj_t self_in) {\n",
    "    // TODO: the proper way of handling this would be to use mp_print_str()\n",
    "    ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);\n",
    "    printf(\"class: ndarray\\n\");\n",
    "    printf(\"shape: (%ld\", (size_t)self->shape[0]);\n",
    "    for(uint8_t i=1; i < self->ndim; i++) {\n",
    "        printf(\", %ld\", (size_t)self->shape[i]);\n",
    "    }\n",
    "    printf(\")\");\n",
    "    printf(\"\\nstrides: (%ld\", (size_t)self->strides[0]);\n",
    "    for(uint8_t i=1; i < self->ndim; i++) {\n",
    "        printf(\", %ld\", (size_t)self->strides[i]);\n",
    "    }\n",
    "    printf(\")\");\n",
    "    printf(\"\\nitemsize: %ld\\n\", mp_binary_get_size('@', self->array->typecode, NULL));\n",
    "    printf(\"data pointer: %p\\n\", self->array->items);\n",
    "    if(self->array->typecode == NDARRAY_BOOL) {\n",
    "        printf(\"type: bool\\n\");\n",
    "    } else if(self->array->typecode == NDARRAY_UINT8) {\n",
    "        printf(\"type: uint8\\n\");\n",
    "    } else if(self->array->typecode == NDARRAY_INT8) {\n",
    "        printf(\"type: int8\\n\");\n",
    "    } else if(self->array->typecode == NDARRAY_UINT16) {\n",
    "        printf(\"type: uint16\\n\");\n",
    "    } else if(self->array->typecode == NDARRAY_INT16) {\n",
    "        printf(\"type: int16\\n\");\n",
    "    } else {\n",
    "        printf(\"type: float\\n\");\n",
    "    }\n",
    "    return mp_const_none;\n",
    "}\n",
    "\n",
    "mp_obj_t ndarray_flatten(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {\n",
    "    static const mp_arg_t allowed_args[] = {\n",
    "        { MP_QSTR_order, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_QSTR(MP_QSTR_C)} },\n",
    "    };\n",
    "\n",
    "    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];\n",
    "    mp_arg_parse_all(n_args - 1, pos_args + 1, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);\n",
    "    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(pos_args[0]);\n",
    "    \n",
    "    GET_STR_DATA_LEN(args[0].u_obj, order, clen);\n",
    "    if((clen != 1) || ((memcmp(order, \"C\", 1) != 0) && (memcmp(order, \"F\", 1) != 0))) {\n",
    "        mp_raise_ValueError(\"flattening order must be either 'C', or 'F'\");        \n",
    "    }\n",
    "    ndarray_obj_t *result = ndarray_new_linear_array(ndarray->len, ndarray->array->typecode);\n",
    "    uint8_t itemsize = mp_binary_get_size('@', ndarray->array->typecode, NULL);\n",
    "    if(ndarray->len == ndarray->array->len) { // this is a dense array, we can simply copy everything\n",
    "        if(memcmp(order, \"C\", 1) == 0) { // C order; this should be fast, because no re-ordering is required\n",
    "            memcpy(result->array->items, ndarray->array->items, itemsize*ndarray->len);\n",
    "        } else { // Fortran order\n",
    "            mp_raise_NotImplementedError(\"flatten is implemented in C order only\");\n",
    "        }\n",
    "        return MP_OBJ_FROM_PTR(result);\n",
    "    } else {\n",
    "        uint8_t *rarray = (uint8_t *)result->array->items;\n",
    "        uint8_t *narray = (uint8_t *)ndarray->array->items;\n",
    "        size_t *coords = ndarray_new_coords(ndarray->ndim);\n",
    "\n",
    "        size_t offset = ndarray->offset;\n",
    "        if(memcmp(order, \"C\", 1) == 0) { // C order; this is a view, so we have to collect the items\n",
    "            for(size_t i=0; i < result->len; i++) {\n",
    "                memcpy(rarray, &narray[offset*itemsize], itemsize);\n",
    "                rarray += itemsize;\n",
    "                offset += ndarray->strides[ndarray->ndim-1];\n",
    "                coords[ndarray->ndim-1] += 1;\n",
    "                for(uint8_t j=ndarray->ndim-1; j > 0; j--) {\n",
    "                    if(coords[j] == ndarray->shape[j]) {\n",
    "                        offset -= ndarray->shape[j] * ndarray->strides[j];\n",
    "                        offset += ndarray->strides[j-1];\n",
    "                        coords[j] = 0;\n",
    "                        coords[j-1] += 1;\n",
    "                    } else { // coordinates can change only, if the last coordinate changes\n",
    "                        break;\n",
    "                    }\n",
    "                }\n",
    "            }\n",
    "            m_del(size_t, coords, ndarray->ndim);\n",
    "        } else { // Fortran order\n",
    "            mp_raise_NotImplementedError(\"flatten is implemented for C order only\");\n",
    "        }\n",
    "        //m_del(int32_t, shape_strides, 1);\n",
    "        return MP_OBJ_FROM_PTR(result);\n",
    "    }\n",
    "}\n",
    "\n",
    "mp_obj_t ndarray_reshape(mp_obj_t oin, mp_obj_t new_shape) {\n",
    "    // TODO: not all reshaping operations can be realised via views!\n",
    "    // returns a new view with the specified shape\n",
    "    ndarray_obj_t *ndarray_in = MP_OBJ_TO_PTR(oin);\n",
    "    mp_obj_tuple_t *shape = MP_OBJ_TO_PTR(new_shape);\n",
    "    if(ndarray_in->len != ndarray_in->array->len) {\n",
    "        mp_raise_ValueError(\"input and output shapes are not compatible\");\n",
    "    }\n",
    "    size_t *shape_array = m_new(size_t, shape->len);\n",
    "    int32_t *strides_array = m_new(int32_t, shape->len);\n",
    "    size_t new_offset = ndarray_in->offset; // this has to be re-calculated\n",
    "    strides_array[shape->len-1] = 1;\n",
    "    shape_array[shape->len-1] = mp_obj_get_int(shape->items[shape->len-1]);\n",
    "    for(uint8_t i=shape->len-1; i > 0; i--) {\n",
    "        shape_array[i-1] = mp_obj_get_int(shape->items[i-1]);\n",
    "        strides_array[i-1] = strides_array[i] * shape_array[i];\n",
    "    }\n",
    "    ndarray_obj_t *ndarray = ndarray_new_view(ndarray_in->array, shape->len, shape_array, strides_array, new_offset, ndarray_in->boolean);\n",
    "    return MP_OBJ_FROM_PTR(ndarray);\n",
    "}\n",
    "\n",
    "mp_obj_t ndarray_transpose(mp_obj_t self_in) {\n",
    "    // TODO: check, what happens to the offset here!\n",
    "    ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);\n",
    "    size_t *shape = m_new(size_t, self->ndim);\n",
    "    int32_t *strides = m_new(int32_t, self->ndim);\n",
    "    for(uint8_t i=0; i < self->ndim; i++) {\n",
    "        shape[i] = self->shape[self->ndim-1-i];\n",
    "        strides[i] = self->strides[self->ndim-1-i];\n",
    "    }\n",
    "    ndarray_obj_t *ndarray = ndarray_new_view(self->array, self->ndim, shape, strides, self->offset, self->boolean);\n",
    "    return MP_OBJ_FROM_PTR(ndarray);\n",
    "}\n",
    "\n",
    "mp_obj_t ndarray_itemsize(mp_obj_t self_in) {\n",
    "    ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);\n",
    "    return mp_obj_new_int(mp_binary_get_size('@', self->array->typecode, NULL));\n",
    "}\n",
    "\n",
    "uint8_t upcasting(ndarray_obj_t *in1, ndarray_obj_t *in2) {\n",
    "    if((in1->array->typecode = NDARRAY_FLOAT) || (in2->array->typecode = NDARRAY_FLOAT)) { // 9 cases\n",
    "        return NDARRAY_FLOAT;\n",
    "    } else if(in1->array->typecode == in2->array->typecode) { // 4 cases\n",
    "        return in1->array->typecode;\n",
    "    } else if(in1->array->typecode == NDARRAY_UINT8) { // 3 cases\n",
    "        if(in2->array->typecode == NDARRAY_UINT16) return NDARRAY_UINT16;\n",
    "        else return NDARRAY_INT16; // in2->array->typecode == NDARRAY_INT8, NDARRAY_INT16, \n",
    "    } else if(in1->array->typecode == NDARRAY_INT8) { // 3 cases\n",
    "        return NDARRAY_INT16; // in2->array->typecode == NDARRAY_UINT8, NDARRAY_UINT16, NDARRAY_INT16\n",
    "    } else if(in1->array->typecode == NDARRAY_UINT16) { // 3 cases\n",
    "        if((in2->array->typecode == NDARRAY_UINT8) || (in2->array->typecode == NDARRAY_INT8)) return NDARRAY_UINT16;\n",
    "        return NDARRAY_FLOAT; // in2->array->typecode == NDARRAY_INT16\n",
    "    } else if(in1->array->typecode == NDARRAY_INT16) { // 3 cases\n",
    "        if((in2->array->typecode == NDARRAY_UINT8) || (in2->array->typecode == NDARRAY_INT8)) return NDARRAY_INT16;\n",
    "        return NDARRAY_FLOAT; // in2->array->typecode == NDARRAY_UINT16\n",
    "    }\n",
    "}\n",
    "\n",
    "size_t *broadcasting(ndarray_obj_t *in1, ndarray_obj_t *in2) {\n",
    "    // creates a new dense array with the dimensions\n",
    "    // and shapes dictated by the broadcasting rules\n",
    "    uint8_t ndim = in2->ndim;\n",
    "    if(in1->ndim > in2->ndim) { \n",
    "        // overwrite ndim, if the first array is larger than the second\n",
    "        ndim = in1->ndim;\n",
    "    }\n",
    "    size_t *shape = m_new(size_t, ndim);\n",
    "    size_t *shape1 = m_new(size_t, ndim);\n",
    "    size_t *shape2 = m_new(size_t, ndim);\n",
    "    for(uint8_t i=0; i < ndim; i++) {\n",
    "        // initialise the shapes with straight ones\n",
    "        shape1[i] = shape2[i] = 1;\n",
    "        shape[i] = 0;\n",
    "    }\n",
    "    // overwrite the first in1->ndim (in2->ndim) shape values from the right\n",
    "    for(uint8_t i=ndim; i > ndim-in1->ndim; i--) {\n",
    "        shape1[i-1] = in1->shape[i-1];\n",
    "    }\n",
    "    for(uint8_t i=ndim; i > ndim-in2->ndim; i--) {\n",
    "        shape2[i-1] = in2->shape[i-1];\n",
    "    }\n",
    "    // check, whether the new shapes conform to the broadcasting rules\n",
    "    for(uint8_t i=0; i < ndim; i++) {\n",
    "        if((shape1[i] == shape2[i]) || (shape1[i] == 1) || (shape2[i] == 1)) {\n",
    "            shape[i] = shape1[i];\n",
    "            if(shape1[i] < shape2[i]) {\n",
    "                shape[i] = shape2[i];\n",
    "            }\n",
    "        } else {\n",
    "            m_del(size_t, shape1, ndim);\n",
    "            m_del(size_t, shape2, ndim);\n",
    "            break;\n",
    "        }\n",
    "    }\n",
    "    m_del(size_t, shape1, ndim);\n",
    "    m_del(size_t, shape2, ndim);\n",
    "    return shape;\n",
    "}\n",
    "\n",
    "ndarray_obj_t *ndarray_do_binary_op(ndarray_obj_t *lhs, ndarray_obj_t *rhs, uint8_t op) {\n",
    "    uint8_t typecode = upcasting(lhs, rhs);\n",
    "    size_t *shape = broadcasting(lhs, rhs);\n",
    "    uint8_t ndim = lhs->ndim;\n",
    "    if(rhs->ndim > lhs->ndim) ndim = rhs->ndim;\n",
    "    // this is going to be the result array\n",
    "    ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(ndim, shape, typecode);\n",
    "\n",
    "    size_t offset = ndarray->offset;\n",
    "    size_t roffset = rhs->offset;\n",
    "    size_t loffset = lhs->offset;\n",
    "    size_t *coords = ndarray_new_coords(ndim); // coordinates in ndarray\n",
    "    size_t *lcoords = ndarray_new_coords(ndim); // coordinates in the left-hand-side ndarray\n",
    "    size_t *rcoords = ndarray_new_coords(ndim); // coordinates in the right-hand-side ndarray\n",
    "    int32_t *lstride = m_new(int32_t, ndim);\n",
    "    int32_t *rstride = m_new(int32_t, ndim);\n",
    "    for(uint8_t i=0; i < lhs->ndim; i++) {\n",
    "        lstride[ndim-1-i] = lhs->strides[lhs->ndim-1-i];\n",
    "    }\n",
    "    for(uint8_t i=lhs->ndim; i < ndim; i++) {\n",
    "        lstride[ndim-1-i] = lhs->strides[0];        \n",
    "    }\n",
    "    for(uint8_t i=0; i < rhs->ndim; i++) {\n",
    "        rstride[ndim-1-i] = rhs->strides[rhs->ndim-1-i];\n",
    "    }\n",
    "    for(uint8_t i=rhs->ndim; i < ndim; i++) {\n",
    "        rstride[ndim-1-i] = rhs->strides[0];        \n",
    "    }\n",
    "    \n",
    "    mp_float_t lvalue, rvalue, result = 0.0;\n",
    "    int32_t ilvalue, irvalue, iresult = 0;\n",
    "    uint8_t float_size = sizeof(mp_float_t);\n",
    "    void *narray = ndarray->array->items;\n",
    "    uint8_t *larray = (uint8_t *)lhs->array->items;\n",
    "    uint8_t *rarray = (uint8_t *)rhs->array->items;\n",
    "    for(size_t i=0; i < ndarray->len; i++) {\n",
    "        // we could make this prettier by moving everything into a function,\n",
    "        // but the function call might be too expensive, especially that it is in the loop...\n",
    "        // left hand side\n",
    "        if(lhs->array->typecode == NDARRAY_UINT8) {\n",
    "            ilvalue = (int32_t)((uint8_t *)larray)[loffset];\n",
    "        } else if(lhs->array->typecode == NDARRAY_INT8) {\n",
    "            ilvalue = (int32_t)((int8_t *)larray)[loffset];\n",
    "        } else if(lhs->array->typecode == NDARRAY_UINT16) {\n",
    "            ilvalue = (int32_t)((uint16_t *)larray)[loffset*2];\n",
    "        } else if(lhs->array->typecode == NDARRAY_INT16) {\n",
    "            ilvalue = (int32_t)((int16_t *)larray)[loffset*2];\n",
    "        } else {\n",
    "            lvalue = (mp_float_t)((mp_float_t *)larray)[loffset*float_size];\n",
    "        }\n",
    "        // right hand side\n",
    "        if(rhs->array->typecode == NDARRAY_UINT8) {\n",
    "            irvalue = (int32_t)((uint8_t *)rarray)[roffset];\n",
    "        } else if(rhs->array->typecode == NDARRAY_INT8) {\n",
    "            irvalue = (int32_t)((int8_t *)rarray)[roffset];\n",
    "        } else if(rhs->array->typecode == NDARRAY_UINT16) {\n",
    "            irvalue = (int32_t)((uint16_t *)rarray)[roffset*2];\n",
    "        } else if(lhs->array->typecode == NDARRAY_INT16) {\n",
    "            irvalue = (int32_t)((int16_t *)rarray)[roffset*2];\n",
    "        } else {\n",
    "            rvalue = (mp_float_t)((mp_float_t *)rarray)[roffset*float_size];\n",
    "        }\n",
    "        // this is the place, where the actual operations take place\n",
    "        if(op == MP_BINARY_OP_ADD) {\n",
    "            result = lvalue + rvalue;\n",
    "        } else if(op == MP_BINARY_OP_SUBTRACT) {\n",
    "            result = lvalue - rvalue;\n",
    "        } else if(op == MP_BINARY_OP_MULTIPLY) {\n",
    "            result = lvalue * rvalue;\n",
    "        }\n",
    "        // cast the result to the proper output type\n",
    "        if(typecode == NDARRAY_UINT8) {\n",
    "            ((uint8_t *)narray)[offset] = (uint8_t)result;\n",
    "        } else if(typecode == NDARRAY_INT8) {\n",
    "            ((int8_t *)narray)[offset] = (int8_t)result;\n",
    "        } else if(typecode == NDARRAY_UINT16) {\n",
    "            ((uint16_t *)narray)[offset] = (uint16_t)result;\n",
    "        } else if(typecode == NDARRAY_INT16) {\n",
    "            ((int16_t *)narray)[offset] = (int16_t)result;\n",
    "        } else {\n",
    "            ((mp_float_t *)narray)[offset] = result;\n",
    "        }\n",
    "        \n",
    "        \n",
    "        offset += ndarray->strides[ndim-1];\n",
    "        coords[ndim-1] += 1;\n",
    "        for(uint8_t j=ndim-1; j > 0; j--) {\n",
    "            if(coords[j] == ndarray->shape[j]) {\n",
    "                offset -= ndarray->shape[j] * ndarray->strides[j];\n",
    "                offset += ndarray->strides[j-1];\n",
    "                coords[j] = 0;\n",
    "                coords[j-1] += 1;\n",
    "            } else { // coordinates can change only, if the last coordinate changes\n",
    "                break;\n",
    "            }        \n",
    "        }   \n",
    "    }\n",
    "    m_del(size_t, coords, ndim);\n",
    "    m_del(size_t, lcoords, ndim);\n",
    "    m_del(size_t, rcoords, ndim);\n",
    "    m_del(int32_t, lstride, ndim);\n",
    "    m_del(int32_t, rstride, ndim);    \n",
    "    return ndarray;\n",
    "}\n",
    "\n",
    "// Binary operations\n",
    "mp_obj_t ndarray_binary_op(mp_binary_op_t op, mp_obj_t LHS, mp_obj_t RHS) {\n",
    "    // First, handle the case, when the operand on the right hand side is a scalar\n",
    "    ndarray_obj_t *lhs = MP_OBJ_TO_PTR(LHS);\n",
    "    ndarray_obj_t *rhs;\n",
    "    if(mp_obj_is_int(RHS) || mp_obj_is_float(RHS)) {\n",
    "        size_t *shape = m_new(size_t, lhs->ndim*sizeof(size_t));\n",
    "        for(uint8_t i=0; i < lhs->ndim; i++) {\n",
    "            shape[i] = 1;\n",
    "        }\n",
    "        if(mp_obj_is_int(RHS)) {\n",
    "            int32_t ivalue = mp_obj_get_int(RHS);\n",
    "            if((ivalue > 0) && (ivalue < 256)) {\n",
    "                rhs = ndarray_new_dense_ndarray(lhs->ndim, shape, NDARRAY_UINT8);\n",
    "            } else if((ivalue > 255) && (ivalue < 65535)) {\n",
    "                rhs = ndarray_new_dense_ndarray(lhs->ndim, shape, NDARRAY_UINT16);\n",
    "            } else if((ivalue < 0) && (ivalue > -128)) {\n",
    "                rhs = ndarray_new_dense_ndarray(lhs->ndim, shape, NDARRAY_INT8);\n",
    "            } else if((ivalue < -127) && (ivalue > -32767)) {\n",
    "                rhs = ndarray_new_dense_ndarray(lhs->ndim, shape, NDARRAY_INT16);\n",
    "            } else { // the integer value clearly does not fit the ulab types, so move on to float\n",
    "                rhs = ndarray_new_dense_ndarray(lhs->ndim, shape, NDARRAY_FLOAT);\n",
    "            }\n",
    "            mp_binary_set_val_array(rhs->array->typecode, rhs->array->items, 0, RHS);\n",
    "        } else { // we have a float\n",
    "            rhs = ndarray_new_dense_ndarray(lhs->ndim, shape, NDARRAY_FLOAT);\n",
    "            mp_binary_set_val_array(rhs->array->typecode, rhs->array->items, 0, RHS);\n",
    "        }\n",
    "    } else {\n",
    "        // the right hand side is an ndarray\n",
    "        rhs = MP_OBJ_TO_PTR(RHS);\n",
    "    }\n",
    "    \n",
    "    ndarray_obj_t *ndarray = NULL;\n",
    "    size_t *new_shape = broadcasting(lhs, rhs);\n",
    "    \n",
    "    switch(op) {\n",
    "        case MP_BINARY_OP_MAT_MULTIPLY:\n",
    "            break;\n",
    "        \n",
    "        case MP_BINARY_OP_ADD:\n",
    "            if(new_shape[0] == 0) {\n",
    "                mp_raise_ValueError(\"operands could not be cast together\");\n",
    "            }\n",
    "            ndarray = ndarray_do_binary_op(lhs, rhs, op);\n",
    "            // The parameters of RUN_BINARY_LOOP are \n",
    "            // typecode of type_out, type_left, type_right, out_array, lhs_array, rhs_array, shape, ndim, operator\n",
    "            //RUN_BINARY_LOOP(ndarray, NDARRAY_FLOAT, mp_float_t, mp_float_t, mp_float_t, lhs, rhs, new_shape, ndim, operator);\n",
    "            /* if(lhs->array->typecode == NDARRAY_UINT8) {\n",
    "                if(rhs->array->typecode == NDARRAY_UINT8) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_UINT8, uint8_t, uint8_t, uint8_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                } else if(rhs->array->typecode == NDARRAY_INT8) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_INT16, int16_t, uint8_t, int8_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                } else if(rhs->array->typecode == NDARRAY_UINT16) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_UINT16, uint16_t, uint8_t, uint16_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                } else if(rhs->array->typecode == NDARRAY_INT16) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_INT16, int16_t, uint8_t, int16_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                } else if(rhs->array->typecode == NDARRAY_FLOAT) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_FLOAT, mp_float_t, uint8_t, mp_float_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                }\n",
    "            } else if(lhs->array->typecode == NDARRAY_INT8) {\n",
    "                if(rhs->array->typecode == NDARRAY_UINT8) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_INT16, int16_t, int8_t, uint8_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                } else if(rhs->array->typecode == NDARRAY_INT8) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_INT8, int8_t, int8_t, int8_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                } else if(rhs->array->typecode == NDARRAY_UINT16) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_INT16, int16_t, int8_t, uint16_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                } else if(rhs->array->typecode == NDARRAY_INT16) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_INT16, int16_t, int8_t, int16_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                } else if(rhs->array->typecode == NDARRAY_FLOAT) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_FLOAT, mp_float_t, int8_t, mp_float_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                }                \n",
    "            } else if(lhs->array->typecode == NDARRAY_UINT16) {\n",
    "                if(rhs->array->typecode == NDARRAY_UINT8) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_UINT16, uint16_t, uint16_t, uint8_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                } else if(rhs->array->typecode == NDARRAY_INT8) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_UINT16, uint16_t, uint16_t, int8_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                } else if(rhs->array->typecode == NDARRAY_UINT16) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_UINT16, uint16_t, uint16_t, uint16_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                } else if(rhs->array->typecode == NDARRAY_INT16) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_FLOAT, mp_float_t, uint16_t, int16_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                } else if(rhs->array->typecode == NDARRAY_FLOAT) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_FLOAT, mp_float_t, uint8_t, mp_float_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                }\n",
    "            } else if(lhs->array->typecode == NDARRAY_INT16) {\n",
    "                if(rhs->array->typecode == NDARRAY_UINT8) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_INT16, int16_t, int16_t, uint8_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                } else if(rhs->array->typecode == NDARRAY_INT8) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_INT16, int16_t, int16_t, int8_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                } else if(rhs->array->typecode == NDARRAY_UINT16) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_FLOAT, mp_float_t, int16_t, uint16_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                } else if(rhs->array->typecode == NDARRAY_INT16) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_INT16, int16_t, int16_t, int16_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                } else if(rhs->array->typecode == NDARRAY_FLOAT) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_FLOAT, mp_float_t, uint16_t, mp_float_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                }\n",
    "            } else if(lhs->array->typecode == NDARRAY_FLOAT) {\n",
    "                if(rhs->array->typecode == NDARRAY_UINT8) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_FLOAT, mp_float_t, mp_float_t, uint8_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                } else if(rhs->array->typecode == NDARRAY_INT8) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_FLOAT, mp_float_t, mp_float_t, int8_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                } else if(rhs->array->typecode == NDARRAY_UINT16) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_FLOAT, mp_float_t, mp_float_t, uint16_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                } else if(rhs->array->typecode == NDARRAY_INT16) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_FLOAT, mp_float_t, mp_float_t, int16_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                } else if(rhs->array->typecode == NDARRAY_FLOAT) {\n",
    "                    RUN_BINARY_LOOP(NDARRAY_FLOAT, mp_float_t, mp_float_t, mp_float_t, lhs, rhs, new_shape, ndim, operator);\n",
    "                } \n",
    "            }  else { // this should never happen\n",
    "                mp_raise_TypeError(\"wrong input type\");\n",
    "            } */\n",
    "        break;\n",
    "        default:\n",
    "//            m_del(size_t, shape, lhs->ndim*sizeof(size_t));\n",
    "            return mp_const_none;\n",
    "        }\n",
    "    \n",
    "    \n",
    "    return MP_OBJ_FROM_PTR(ndarray);\n",
    "//    return mp_const_none;\n",
    "}\n",
    "\n",
    "mp_obj_t ndarray_unary_op(mp_unary_op_t op, mp_obj_t self_in) {\n",
    "    ndarray_obj_t *self = MP_OBJ_TO_PTR(self_in);\n",
    "    ndarray_obj_t *ndarray;\n",
    "\n",
    "    switch(op) {\n",
    "        case MP_UNARY_OP_LEN:\n",
    "            return mp_obj_new_int(self->shape[0]);\n",
    "            break;\n",
    "\n",
    "        case MP_UNARY_OP_INVERT:\n",
    "            if(self->array->typecode == NDARRAY_FLOAT) {\n",
    "                mp_raise_ValueError(\"operation is not supported for given type\");\n",
    "            }\n",
    "            // we can invert the content byte by byte, there is no need to distinguish between different typecodes\n",
    "            ndarray = ndarray_copy_view(self, self->array->typecode);\n",
    "            uint8_t *array = (uint8_t *)ndarray->array->items;\n",
    "            if(self->boolean == NDARRAY_BOOLEAN) {\n",
    "                for(size_t i=0; i < self->len; i++) array[i] = 1 - array[i];\n",
    "            }\n",
    "            else {\n",
    "                for(size_t i=0; i < self->len; i++) array[i] = ~array[i];\n",
    "            }\n",
    "            return MP_OBJ_FROM_PTR(ndarray);\n",
    "            break;\n",
    "        \n",
    "        case MP_UNARY_OP_NEGATIVE:\n",
    "            if(self->boolean == NDARRAY_BOOLEAN) {\n",
    "                mp_raise_TypeError(\"boolean negative '-' is not supported, use the '~' operator instead\");\n",
    "            }\n",
    "            ndarray = ndarray_copy_view(self, self->array->typecode);\n",
    "            if(self->array->typecode == NDARRAY_UINT8) {\n",
    "                uint8_t *array = (uint8_t *)ndarray->array->items;\n",
    "                for(size_t i=0; i < self->len; i++) array[i] = -array[i];\n",
    "            } else if(self->array->typecode == NDARRAY_INT8) {\n",
    "                int8_t *array = (int8_t *)ndarray->array->items;\n",
    "                for(size_t i=0; i < self->len; i++) array[i] = -array[i];\n",
    "            } else if(self->array->typecode == NDARRAY_UINT16) {\n",
    "                uint16_t *array = (uint16_t *)ndarray->array->items;\n",
    "                for(size_t i=0; i < self->len; i++) array[i] = -array[i];\n",
    "            } else if(self->array->typecode == NDARRAY_INT16) {\n",
    "                int16_t *array = (int16_t *)ndarray->array->items;\n",
    "                for(size_t i=0; i < self->len; i++) array[i] = -array[i];\n",
    "            } else {\n",
    "                mp_float_t *array = (mp_float_t *)ndarray->array->items;\n",
    "                for(size_t i=0; i < self->len; i++) array[i] = -array[i];\n",
    "            }\n",
    "            return MP_OBJ_FROM_PTR(ndarray);\n",
    "            break;\n",
    "\n",
    "        case MP_UNARY_OP_POSITIVE:\n",
    "            return MP_OBJ_FROM_PTR(ndarray_copy_view(self, self->array->typecode));\n",
    "\n",
    "        case MP_UNARY_OP_ABS:\n",
    "            if((self->array->typecode == NDARRAY_UINT8) || (self->array->typecode == NDARRAY_UINT16)) {\n",
    "                return MP_OBJ_FROM_PTR(ndarray_copy_view(self, self->array->typecode));\n",
    "            }\n",
    "            ndarray = ndarray_copy_view(self, self->array->typecode);\n",
    "            if(self->array->typecode == NDARRAY_INT8) {\n",
    "                int8_t *array = (int8_t *)ndarray->array->items;\n",
    "                for(size_t i=0; i < self->len; i++) {\n",
    "                    if(array[i] < 0) array[i] = -array[i];\n",
    "                }\n",
    "            } else if(self->array->typecode == NDARRAY_INT16) {\n",
    "                int16_t *array = (int16_t *)ndarray->array->items;\n",
    "                for(size_t i=0; i < self->len; i++) {\n",
    "                    if(array[i] < 0) array[i] = -array[i];\n",
    "                }\n",
    "            } else {\n",
    "                mp_float_t *array = (mp_float_t *)ndarray->array->items;\n",
    "                for(size_t i=0; i < self->array->len; i++) {\n",
    "                    if(array[i] < 0) array[i] = -array[i];\n",
    "                }\n",
    "            }\n",
    "            return MP_OBJ_FROM_PTR(ndarray);\n",
    "        break;\n",
    "\n",
    "        default: return MP_OBJ_NULL; // operator not supported\n",
    "    }\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Vectorise"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## vectorise.h"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-03T21:49:16.517419Z",
     "start_time": "2019-12-03T21:49:16.500269Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "written 1470 bytes to vectorise.h\n"
     ]
    }
   ],
   "source": [
    "%%ccode vectorise.h\n",
    "\n",
    "#ifndef _VECTORISE_\n",
    "#define _VECTORISE_\n",
    "\n",
    "#include \"ndarray.h\"\n",
    "\n",
    "mp_obj_t vectorise_acos(mp_obj_t );\n",
    "mp_obj_t vectorise_acosh(mp_obj_t );\n",
    "mp_obj_t vectorise_asin(mp_obj_t );\n",
    "mp_obj_t vectorise_asinh(mp_obj_t );\n",
    "mp_obj_t vectorise_atan(mp_obj_t );\n",
    "mp_obj_t vectorise_atanh(mp_obj_t );\n",
    "mp_obj_t vectorise_ceil(mp_obj_t );\n",
    "mp_obj_t vectorise_cos(mp_obj_t );\n",
    "mp_obj_t vectorise_erf(mp_obj_t );\n",
    "mp_obj_t vectorise_erfc(mp_obj_t );\n",
    "mp_obj_t vectorise_exp(mp_obj_t );\n",
    "mp_obj_t vectorise_expm1(mp_obj_t );\n",
    "mp_obj_t vectorise_floor(mp_obj_t );\n",
    "mp_obj_t vectorise_gamma(mp_obj_t );\n",
    "mp_obj_t vectorise_lgamma(mp_obj_t );\n",
    "mp_obj_t vectorise_log(mp_obj_t );\n",
    "mp_obj_t vectorise_log10(mp_obj_t );\n",
    "mp_obj_t vectorise_log2(mp_obj_t );\n",
    "mp_obj_t vectorise_sin(mp_obj_t );\n",
    "mp_obj_t vectorise_sinh(mp_obj_t );\n",
    "mp_obj_t vectorise_sqrt(mp_obj_t );\n",
    "mp_obj_t vectorise_tan(mp_obj_t );\n",
    "mp_obj_t vectorise_tanh(mp_obj_t );\n",
    "\n",
    "#define ITERATE_VECTOR(type, source, out) do {\\\n",
    "    type *input = (type *)(source)->array->items;\\\n",
    "    for(size_t i=0; i < (source)->array->len; i++) {\\\n",
    "        *(out)++ = f(*input++);\\\n",
    "    }\\\n",
    "} while(0)\n",
    "\n",
    "#define MATH_FUN_1(py_name, c_name) \\\n",
    "    mp_obj_t vectorise_ ## py_name(mp_obj_t x_obj) { \\\n",
    "        return vectorise_generic_vector(x_obj, MICROPY_FLOAT_C_FUN(c_name)); \\\n",
    "    }\n",
    "    \n",
    "#endif"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## vectorise.c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-03T21:46:52.343220Z",
     "start_time": "2019-12-03T21:46:52.339052Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "written 3254 bytes to vectorise.c\n"
     ]
    }
   ],
   "source": [
    "%%ccode vectorise.c\n",
    "\n",
    "#include <math.h>\n",
    "#include <stdio.h>\n",
    "#include <stdlib.h>\n",
    "#include \"py/runtime.h\"\n",
    "#include \"py/binary.h\"\n",
    "#include \"py/obj.h\"\n",
    "#include \"py/objarray.h\"\n",
    "#include \"vectorise.h\"\n",
    "\n",
    "#ifndef MP_PI\n",
    "#define MP_PI MICROPY_FLOAT_CONST(3.14159265358979323846)\n",
    "#endif\n",
    "    \n",
    "mp_obj_t vectorise_generic_vector(mp_obj_t o_in, mp_float_t (*f)(mp_float_t)) {\n",
    "    // Return a single value, if o_in is not iterable\n",
    "    if(mp_obj_is_float(o_in) || mp_obj_is_integer(o_in)) {\n",
    "        return mp_obj_new_float(f(mp_obj_get_float(o_in)));\n",
    "    }\n",
    "    if(MP_OBJ_IS_TYPE(o_in, &ulab_ndarray_type)) {\n",
    "        ndarray_obj_t *source = MP_OBJ_TO_PTR(o_in);\n",
    "        ndarray_obj_t *ndarray;\n",
    "        if(source->len == source->array->len) { // this is a dense array\n",
    "            ndarray = ndarray_new_dense_ndarray(source->ndim, source->shape, NDARRAY_FLOAT);\n",
    "            mp_float_t *array = (mp_float_t *)ndarray->array->items;\n",
    "            if(source->array->typecode == NDARRAY_UINT8) {\n",
    "                ITERATE_VECTOR(uint8_t, source, array);\n",
    "            } else if(source->array->typecode == NDARRAY_INT8) {\n",
    "                ITERATE_VECTOR(int8_t, source, array);\n",
    "            } else if(source->array->typecode == NDARRAY_UINT16) {\n",
    "                ITERATE_VECTOR(uint16_t, source, array);\n",
    "            } else if(source->array->typecode == NDARRAY_INT16) {\n",
    "                ITERATE_VECTOR(int16_t, source, array);\n",
    "            } else {\n",
    "                ITERATE_VECTOR(mp_float_t, source, array);\n",
    "            } \n",
    "        } else {\n",
    "            ndarray = ndarray_copy_view(source, NDARRAY_FLOAT);\n",
    "            mp_float_t *dataout = (mp_float_t *)ndarray->array->items;\n",
    "            ITERATE_VECTOR(mp_float_t, ndarray, dataout);\n",
    "        }\n",
    "        return MP_OBJ_FROM_PTR(ndarray);\n",
    "    } else if(MP_OBJ_IS_TYPE(o_in, &mp_type_tuple) || MP_OBJ_IS_TYPE(o_in, &mp_type_list) || \n",
    "        MP_OBJ_IS_TYPE(o_in, &mp_type_range)) { // i.e., the input is a generic, one-dimensional iterable\n",
    "            mp_obj_array_t *o = MP_OBJ_TO_PTR(o_in);\n",
    "            ndarray_obj_t *out = ndarray_new_linear_array(o->len, NDARRAY_FLOAT);\n",
    "            mp_float_t *dataout = (mp_float_t *)out->array->items;\n",
    "            \n",
    "            mp_obj_iter_buf_t iter_buf;\n",
    "            mp_obj_t item, iterable = mp_getiter(o_in, &iter_buf);\n",
    "            mp_float_t x;\n",
    "            while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {\n",
    "                x = mp_obj_get_float(item);\n",
    "                *dataout++ = f(x);\n",
    "            }\n",
    "        return MP_OBJ_FROM_PTR(out);\n",
    "    }\n",
    "    return mp_const_none;\n",
    "}\n",
    "\n",
    "MATH_FUN_1(acos, acos);\n",
    "MATH_FUN_1(acosh, acosh);\n",
    "MATH_FUN_1(asin, asin);\n",
    "MATH_FUN_1(asinh, asinh);\n",
    "MATH_FUN_1(atan, atan);\n",
    "MATH_FUN_1(atanh, atanh);\n",
    "MATH_FUN_1(ceil, ceil);\n",
    "MATH_FUN_1(cos, cos);\n",
    "MATH_FUN_1(erf, erf);\n",
    "MATH_FUN_1(erfc, erfc);\n",
    "MATH_FUN_1(exp, exp);\n",
    "MATH_FUN_1(expm1, expm1);\n",
    "MATH_FUN_1(floor, floor);\n",
    "MATH_FUN_1(gamma, tgamma);\n",
    "MATH_FUN_1(lgamma, lgamma);\n",
    "MATH_FUN_1(log, log);\n",
    "MATH_FUN_1(log10, log10);\n",
    "MATH_FUN_1(log2, log2);\n",
    "MATH_FUN_1(sin, sin);\n",
    "MATH_FUN_1(sinh, sinh);\n",
    "MATH_FUN_1(sqrt, sqrt);\n",
    "MATH_FUN_1(tan, tan);\n",
    "MATH_FUN_1(tanh, tanh);"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Linalg"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## linalg.h"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-04T06:00:11.275084Z",
     "start_time": "2019-12-04T06:00:11.258775Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "written 819 bytes to linalg.h\n"
     ]
    }
   ],
   "source": [
    "%%ccode linalg.h\n",
    "\n",
    "#ifndef _LINALG_\n",
    "#define _LINALG_\n",
    "\n",
    "#include \"ndarray.h\"\n",
    "\n",
    "#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT\n",
    "#define epsilon        1.2e-7\n",
    "#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE\n",
    "#define epsilon        2.3e-16\n",
    "#endif\n",
    "\n",
    "#define JACOBI_MAX     20\n",
    "\n",
    "bool linalg_invert_matrix(mp_float_t *, size_t );\n",
    "mp_obj_t linalg_inv(mp_obj_t );\n",
    "mp_obj_t linalg_dot(mp_obj_t , mp_obj_t );\n",
    "mp_obj_t linalg_zeros(size_t , const mp_obj_t *, mp_map_t *);\n",
    "mp_obj_t linalg_ones(size_t , const mp_obj_t *, mp_map_t *);\n",
    "mp_obj_t linalg_eye(size_t , const mp_obj_t *, mp_map_t *);\n",
    "\n",
    "mp_obj_t linalg_det(mp_obj_t );\n",
    "mp_obj_t linalg_eig(mp_obj_t );\n",
    "\n",
    "#endif"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## linalg.c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-02T20:22:55.259549Z",
     "start_time": "2019-12-02T20:22:55.250088Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "written 15460 bytes to linalg.c\n"
     ]
    }
   ],
   "source": [
    "%%ccode linalg.c\n",
    "\n",
    "#include <stdlib.h>\n",
    "#include <string.h>\n",
    "#include <math.h>\n",
    "#include \"py/obj.h\"\n",
    "#include \"py/runtime.h\"\n",
    "#include \"py/misc.h\"\n",
    "#include \"linalg.h\"\n",
    "\n",
    "bool linalg_invert_matrix(mp_float_t *data, size_t N) {\n",
    "    // returns true, of the inversion was successful, \n",
    "    // false, if the matrix is singular\n",
    "    \n",
    "    // initially, this is the unit matrix: the contents of this matrix is what \n",
    "    // will be returned after all the transformations\n",
    "    mp_float_t *unit = m_new(mp_float_t, N*N);\n",
    "\n",
    "    mp_float_t elem = 1.0;\n",
    "    // initialise the unit matrix\n",
    "    memset(unit, 0, sizeof(mp_float_t)*N*N);\n",
    "    for(size_t m=0; m < N; m++) {\n",
    "        memcpy(&unit[m*(N+1)], &elem, sizeof(mp_float_t));\n",
    "    }\n",
    "    for(size_t m=0; m < N; m++){\n",
    "        // this could be faster with ((c < epsilon) && (c > -epsilon))\n",
    "        if(MICROPY_FLOAT_C_FUN(fabs)(data[m*(N+1)]) < epsilon) {\n",
    "            m_del(mp_float_t, unit, N*N);\n",
    "            return false;\n",
    "        }\n",
    "        for(size_t n=0; n < N; n++){\n",
    "            if(m != n){\n",
    "                elem = data[N*n+m] / data[m*(N+1)];\n",
    "                for(size_t k=0; k < N; k++){\n",
    "                    data[N*n+k] -= elem * data[N*m+k];\n",
    "                    unit[N*n+k] -= elem * unit[N*m+k];\n",
    "                }\n",
    "            }\n",
    "        }\n",
    "    }\n",
    "    for(size_t m=0; m < N; m++){ \n",
    "        elem = data[m*(N+1)];\n",
    "        for(size_t n=0; n < N; n++){\n",
    "            data[N*m+n] /= elem;\n",
    "            unit[N*m+n] /= elem;\n",
    "        }\n",
    "    }\n",
    "    memcpy(data, unit, sizeof(mp_float_t)*N*N);\n",
    "    m_del(mp_float_t, unit, N*N);\n",
    "    return true;\n",
    "}\n",
    "\n",
    "mp_obj_t linalg_inv(mp_obj_t o_in) {\n",
    "    if(!MP_OBJ_IS_TYPE(o_in, &ulab_ndarray_type)) {\n",
    "        mp_raise_TypeError(\"only ndarray objects can be inverted\");\n",
    "    }\n",
    "    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(o_in);\n",
    "    if(ndarray->ndim != 2) {\n",
    "        mp_raise_ValueError(\"only two-dimensional tensors can be inverted\");\n",
    "    }\n",
    "    if(ndarray->shape[0] != ndarray->shape[1]) {\n",
    "        mp_raise_ValueError(\"only square matrices can be inverted\");\n",
    "    }\n",
    "    size_t *shape = m_new(size_t, 2);\n",
    "    shape[0] = shape[1] = ndarray->shape[0];\n",
    "    ndarray_obj_t *inverted = ndarray_new_dense_ndarray(2, shape, NDARRAY_FLOAT);\n",
    "    mp_float_t *data = (mp_float_t *)inverted->array->items;\n",
    "    mp_obj_t elem;\n",
    "    for(size_t m=0; m < ndarray->shape[0]; m++) { // rows first\n",
    "        for(size_t n=0; n < ndarray->shape[1]; n++) { // columns next\n",
    "            // this could, perhaps, be done in single line... \n",
    "            // On the other hand, we probably spend little time here\n",
    "            elem = mp_binary_get_val_array(ndarray->array->typecode, ndarray->array->items, m*ndarray->shape[1]+n);\n",
    "            data[m*ndarray->shape[1]+n] = (mp_float_t)mp_obj_get_float(elem);\n",
    "        }\n",
    "    }\n",
    "    \n",
    "    if(!linalg_invert_matrix(data, ndarray->shape[0])) {\n",
    "        // TODO: I am not sure this is needed here. Otherwise, how should we free up the unused RAM of inverted?\n",
    "        m_del(mp_float_t, inverted->array->items, ndarray->shape[0]*ndarray->shape[1]);\n",
    "        mp_raise_ValueError(\"input matrix is singular\");\n",
    "    }\n",
    "    return MP_OBJ_FROM_PTR(inverted);\n",
    "}\n",
    "\n",
    "mp_obj_t linalg_dot(mp_obj_t _m1, mp_obj_t _m2) {\n",
    "    return mp_const_none;\n",
    "    /*\n",
    "    // TODO: should the results be upcast?\n",
    "    ndarray_obj_t *m1 = MP_OBJ_TO_PTR(_m1);\n",
    "    ndarray_obj_t *m2 = MP_OBJ_TO_PTR(_m2);    \n",
    "    if(m1->n != m2->m) {\n",
    "        mp_raise_ValueError(\"matrix dimensions do not match\");\n",
    "    }\n",
    "    // TODO: numpy uses upcasting here\n",
    "    ndarray_obj_t *out = create_new_ndarray(m1->m, m2->n, NDARRAY_FLOAT);\n",
    "    mp_float_t *outdata = (mp_float_t *)out->array->items;\n",
    "    mp_float_t sum, v1, v2;\n",
    "    for(size_t i=0; i < m1->m; i++) { // rows of m1\n",
    "        for(size_t j=0; j < m2->n; j++) { // columns of m2\n",
    "            sum = 0.0;\n",
    "            for(size_t k=0; k < m2->m; k++) {\n",
    "                // (i, k) * (k, j)\n",
    "                v1 = ndarray_get_float_value(m1->array->items, m1->array->typecode, i*m1->n+k);\n",
    "                v2 = ndarray_get_float_value(m2->array->items, m2->array->typecode, k*m2->n+j);\n",
    "                sum += v1 * v2;\n",
    "            }\n",
    "            outdata[i*m1->m+j] = sum;\n",
    "        }\n",
    "    }\n",
    "    return MP_OBJ_FROM_PTR(out); */\n",
    "} \n",
    "\n",
    "mp_obj_t linalg_zeros_ones(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args, uint8_t kind) {\n",
    "    static const mp_arg_t allowed_args[] = {\n",
    "        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_obj = MP_OBJ_NULL} } ,\n",
    "        { MP_QSTR_dtype, MP_ARG_KW_ONLY | MP_ARG_INT, {.u_int = NDARRAY_FLOAT} },\n",
    "    };\n",
    "\n",
    "    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];\n",
    "    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);\n",
    "    \n",
    "    uint8_t dtype = args[1].u_int;\n",
    "    if(!mp_obj_is_int(args[0].u_obj) && !mp_obj_is_type(args[0].u_obj, &mp_type_tuple)) {\n",
    "        mp_raise_TypeError(\"input argument must be an integer or a tuple\");\n",
    "    }\n",
    "    ndarray_obj_t *ndarray = NULL;\n",
    "    if(mp_obj_is_int(args[0].u_obj)) {\n",
    "        size_t n = mp_obj_get_int(args[0].u_obj);\n",
    "        size_t *shape = m_new(size_t, 1);\n",
    "        shape[0] = n;\n",
    "        ndarray = ndarray_new_dense_ndarray(1, shape, dtype);\n",
    "    } else if(mp_obj_is_type(args[0].u_obj, &mp_type_tuple)) {\n",
    "        mp_obj_tuple_t *tuple = MP_OBJ_TO_PTR(args[0].u_obj);\n",
    "        size_t *shape = m_new(size_t, tuple->len);\n",
    "        for(uint8_t i=0; i < tuple->len; i++) {\n",
    "            shape[i] = mp_obj_get_int(tuple->items[i]);\n",
    "        }\n",
    "        ndarray = ndarray_new_dense_ndarray(tuple->len, shape, dtype);\n",
    "    }\n",
    "    if(kind == 1) {\n",
    "        mp_obj_t one = mp_obj_new_int(1);\n",
    "        for(size_t i=0; i < ndarray->array->len; i++) {\n",
    "            mp_binary_set_val_array(dtype, ndarray->array->items, i, one);\n",
    "        }\n",
    "    }\n",
    "    return MP_OBJ_FROM_PTR(ndarray);\n",
    "}\n",
    "\n",
    "mp_obj_t linalg_zeros(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {\n",
    "    return linalg_zeros_ones(n_args, pos_args, kw_args, 0);\n",
    "}\n",
    "\n",
    "mp_obj_t linalg_ones(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {\n",
    "    return linalg_zeros_ones(n_args, pos_args, kw_args, 1);\n",
    "}\n",
    "\n",
    "mp_obj_t linalg_eye(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {\n",
    "    // TODO: this is a bit more problematic in higher dimensions\n",
    "    return mp_const_none;\n",
    "    /*\n",
    "    static const mp_arg_t allowed_args[] = {\n",
    "        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_INT, {.u_int = 0} },\n",
    "        { MP_QSTR_M, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_PTR(&mp_const_none_obj) } },\n",
    "        { MP_QSTR_k, MP_ARG_KW_ONLY | MP_ARG_INT, {.u_int = 0} },        \n",
    "        { MP_QSTR_dtype, MP_ARG_KW_ONLY | MP_ARG_INT, {.u_int = NDARRAY_FLOAT} },\n",
    "    };\n",
    "\n",
    "    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];\n",
    "    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);\n",
    "\n",
    "    size_t n = args[0].u_int, m;\n",
    "    int16_t k = args[2].u_int;\n",
    "    uint8_t dtype = args[3].u_int;\n",
    "    if(args[1].u_rom_obj == mp_const_none) {\n",
    "        m = n;\n",
    "    } else {\n",
    "        m = mp_obj_get_int(args[1].u_rom_obj);\n",
    "    }\n",
    "    \n",
    "    ndarray_obj_t *ndarray = create_new_ndarray(m, n, dtype);\n",
    "    mp_obj_t one = mp_obj_new_int(1);\n",
    "    size_t i = 0;\n",
    "    if((k >= 0) && (k < n)) {\n",
    "        while(k < n) {\n",
    "            mp_binary_set_val_array(dtype, ndarray->array->items, i*n+k, one);\n",
    "            k++;\n",
    "            i++;\n",
    "        }\n",
    "    } else if((k < 0) && (-k < m)) {\n",
    "        k = -k;\n",
    "        i = 0;\n",
    "        while(k < m) {\n",
    "            mp_binary_set_val_array(dtype, ndarray->array->items, k*n+i, one);\n",
    "            k++;\n",
    "            i++;\n",
    "        }\n",
    "    }\n",
    "    return MP_OBJ_FROM_PTR(ndarray); */\n",
    "}\n",
    "\n",
    "mp_obj_t linalg_det(mp_obj_t oin) {\n",
    "    if(!mp_obj_is_type(oin, &ulab_ndarray_type)) {\n",
    "        mp_raise_TypeError(\"function defined for ndarrays only\");\n",
    "    }\n",
    "    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(oin);\n",
    "    if(ndarray->ndim != 2) {\n",
    "        mp_raise_ValueError(\"only two-dimensional tensors can be inverted\");\n",
    "    }\n",
    "    if(ndarray->shape[0] != ndarray->shape[1]) {\n",
    "        mp_raise_ValueError(\"only square matrices can be inverted\");\n",
    "    }\n",
    "    \n",
    "    mp_float_t *tmp = m_new(mp_float_t, ndarray->shape[0]*ndarray->shape[1]);\n",
    "    // TODO: this won't work for sliced arrays\n",
    "    for(size_t i=0; i < ndarray->len; i++){\n",
    "        tmp[i] = ndarray_get_float_value(ndarray->array->items, ndarray->array->typecode, i);\n",
    "    }\n",
    "    mp_float_t c;\n",
    "    for(size_t m=0; m < ndarray->shape[0]-1; m++){\n",
    "        if(MICROPY_FLOAT_C_FUN(fabs)(tmp[m*(ndarray->shape[1]+1)]) < epsilon) {\n",
    "            m_del(mp_float_t, tmp, ndarray->shape[0]*ndarray->shape[1]);\n",
    "            return mp_obj_new_float(0.0);\n",
    "        }\n",
    "        for(size_t n=0; n < ndarray->shape[1]; n++){\n",
    "            if(m != n) {\n",
    "                c = tmp[ndarray->shape[0]*n+m] / tmp[m*(ndarray->shape[1]+1)];\n",
    "                for(size_t k=0; k < ndarray->shape[1]; k++){\n",
    "                    tmp[ndarray->shape[1]*n+k] -= c * tmp[ndarray->shape[1]*m+k];\n",
    "                }\n",
    "            }\n",
    "        }\n",
    "    }\n",
    "    mp_float_t det = 1.0;\n",
    "                            \n",
    "    for(size_t m=0; m < ndarray->shape[0]; m++){ \n",
    "        det *= tmp[m*(ndarray->shape[1]+1)];\n",
    "    }\n",
    "    m_del(mp_float_t, tmp, ndarray->shape[0]*ndarray->shape[1]);\n",
    "    return mp_obj_new_float(det);\n",
    "}\n",
    "\n",
    "mp_obj_t linalg_eig(mp_obj_t oin) {\n",
    "    if(!mp_obj_is_type(oin, &ulab_ndarray_type)) {\n",
    "        mp_raise_TypeError(\"function defined for ndarrays only\");\n",
    "    }\n",
    "    ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(oin);\n",
    "    if(ndarray->ndim != 2) {\n",
    "        mp_raise_ValueError(\"only two-dimensional tensors can be inverted\");\n",
    "    }\n",
    "    if(ndarray->shape[0] != ndarray->shape[1]) {\n",
    "        mp_raise_ValueError(\"only square matrices can be inverted\");\n",
    "    }\n",
    "    mp_float_t *array = m_new(mp_float_t, ndarray->len);\n",
    "    // TODO: this won't work for sliced arrays\n",
    "    for(size_t i=0; i < ndarray->len; i++) {\n",
    "        array[i] = ndarray_get_float_value(ndarray->array->items, ndarray->array->typecode, i);\n",
    "    }\n",
    "    // make sure the matrix is symmetric\n",
    "    for(size_t m=0; m < ndarray->shape[0]; m++) {\n",
    "        for(size_t n=m+1; n < ndarray->shape[1]; n++) {\n",
    "            // compare entry (m, n) to (n, m)\n",
    "            // TODO: this must probably be scaled!\n",
    "            if(epsilon < MICROPY_FLOAT_C_FUN(fabs)(array[m*ndarray->shape[0] + n] - array[n*ndarray->shape[0] + m])) {\n",
    "                mp_raise_ValueError(\"input matrix is asymmetric\");\n",
    "            }\n",
    "        }\n",
    "    }\n",
    "    \n",
    "    // if we got this far, then the matrix will be symmetric\n",
    "    size_t *shape = m_new(size_t, 2);\n",
    "    shape[0] = shape[1] = ndarray->shape[0];\n",
    "    ndarray_obj_t *eigenvectors = ndarray_new_dense_ndarray(2, shape, NDARRAY_FLOAT);\n",
    "    mp_float_t *eigvectors = (mp_float_t *)eigenvectors->array->items;\n",
    "    // start out with the unit matrix\n",
    "    for(size_t m=0; m < ndarray->shape[0]; m++) {\n",
    "        eigvectors[m*(ndarray->shape[1]+1)] = 1.0;\n",
    "    }\n",
    "    mp_float_t largest, w, t, c, s, tau, aMk, aNk, vm, vn;\n",
    "    size_t M, N;\n",
    "    size_t iterations = JACOBI_MAX*ndarray->shape[0]*ndarray->shape[0];\n",
    "    do {\n",
    "        iterations--;\n",
    "        // find the pivot here\n",
    "        M = 0;\n",
    "        N = 0;\n",
    "        largest = 0.0;\n",
    "        for(size_t m=0; m < ndarray->shape[0]-1; m++) { // -1: no need to inspect last row\n",
    "            for(size_t n=m+1; n < ndarray->shape[0]; n++) {\n",
    "                w = MICROPY_FLOAT_C_FUN(fabs)(array[m*ndarray->shape[0] + n]);\n",
    "                if((largest < w) && (epsilon < w)) {\n",
    "                    M = m;\n",
    "                    N = n;\n",
    "                    largest = w;\n",
    "                }\n",
    "            }\n",
    "        }\n",
    "        if(M+N == 0) { // all entries are smaller than epsilon, there is not much we can do...\n",
    "            break;\n",
    "        }\n",
    "        // at this point, we have the pivot, and it is the entry (M, N)\n",
    "        // now we have to find the rotation angle\n",
    "        w = (array[N*ndarray->shape[0] + N] - array[M*ndarray->shape[0] + M]) / (2.0*array[M*ndarray->shape[0] + N]);\n",
    "        // The following if/else chooses the smaller absolute value for the tangent \n",
    "        // of the rotation angle. Going with the smaller should be numerically stabler.\n",
    "        if(w > 0) {\n",
    "            t = MICROPY_FLOAT_C_FUN(sqrt)(w*w + 1.0) - w;\n",
    "        } else {\n",
    "            t = -1.0*(MICROPY_FLOAT_C_FUN(sqrt)(w*w + 1.0) + w);\n",
    "        }\n",
    "        s = t / MICROPY_FLOAT_C_FUN(sqrt)(t*t + 1.0); // the sine of the rotation angle\n",
    "        c = 1.0 / MICROPY_FLOAT_C_FUN(sqrt)(t*t + 1.0); // the cosine of the rotation angle\n",
    "        tau = (1.0-c)/s; // this is equal to the tangent of the half of the rotation angle\n",
    "        \n",
    "        // at this point, we have the rotation angles, so we can transform the matrix\n",
    "        // first the two diagonal elements\n",
    "        // a(M, M) = a(M, M) - t*a(M, N)\n",
    "        array[M*ndarray->shape[0] + M] = array[M*ndarray->shape[0] + M] - t * array[M*ndarray->shape[0] + N];\n",
    "        // a(N, N) = a(N, N) + t*a(M, N)\n",
    "        array[N*ndarray->shape[0] + N] = array[N*ndarray->shape[0] + N] + t * array[M*ndarray->shape[0] + N];\n",
    "        // after the rotation, the a(M, N), and a(N, M) entries should become zero\n",
    "        array[M*ndarray->shape[0] + N] = array[N*ndarray->shape[0] + M] = 0.0;\n",
    "        // then all other elements in the column\n",
    "        for(size_t k=0; k < ndarray->shape[0]; k++) {\n",
    "            if((k == M) || (k == N)) {\n",
    "                continue;\n",
    "            }\n",
    "            aMk = array[M*ndarray->shape[0] + k];\n",
    "            aNk = array[N*ndarray->shape[0] + k];\n",
    "            // a(M, k) = a(M, k) - s*(a(N, k) + tau*a(M, k))\n",
    "            array[M*ndarray->shape[0] + k] -= s*(aNk + tau*aMk);\n",
    "            // a(N, k) = a(N, k) + s*(a(M, k) - tau*a(N, k))\n",
    "            array[N*ndarray->shape[0] + k] += s*(aMk - tau*aNk);\n",
    "            // a(k, M) = a(M, k)\n",
    "            array[k*ndarray->shape[0] + M] = array[M*ndarray->shape[0] + k];\n",
    "            // a(k, N) = a(N, k)\n",
    "            array[k*ndarray->shape[0] + N] = array[N*ndarray->shape[0] + k];\n",
    "        }\n",
    "        // now we have to update the eigenvectors\n",
    "        // the rotation matrix, R, multiplies from the right\n",
    "        // R is the unit matrix, except for the \n",
    "        // R(M,M) = R(N, N) = c\n",
    "        // R(N, M) = s\n",
    "        // (M, N) = -s\n",
    "        // entries. This means that only the Mth, and Nth columns will change\n",
    "        for(size_t m=0; m < ndarray->shape[0]; m++) {\n",
    "            vm = eigvectors[m*ndarray->shape[0]+M];\n",
    "            vn = eigvectors[m*ndarray->shape[0]+N];\n",
    "            // the new value of eigvectors(m, M)\n",
    "            eigvectors[m*ndarray->shape[0]+M] = c * vm - s * vn;\n",
    "            // the new value of eigvectors(m, N)\n",
    "            eigvectors[m*ndarray->shape[0]+N] = s * vm + c * vn;\n",
    "        }\n",
    "    } while(iterations > 0);\n",
    "    \n",
    "    if(iterations == 0) { \n",
    "        // the computation did not converge; numpy raises LinAlgError\n",
    "        m_del(mp_float_t, array, ndarray->len);\n",
    "        mp_raise_ValueError(\"iterations did not converge\");\n",
    "    }\n",
    "    size_t *eigen_shape = m_new(size_t, 1);\n",
    "    eigen_shape[0] = ndarray->shape[0];\n",
    "    ndarray_obj_t *eigenvalues = ndarray_new_dense_ndarray(1, eigen_shape, NDARRAY_FLOAT);\n",
    "    mp_float_t *eigvalues = (mp_float_t *)eigenvalues->array->items;\n",
    "    for(size_t i=0; i < ndarray->shape[0]; i++) {\n",
    "        eigvalues[i] = array[i*(ndarray->shape[0]+1)];\n",
    "    }\n",
    "    m_del(mp_float_t, array, ndarray->len);\n",
    "    \n",
    "    mp_obj_tuple_t *tuple = MP_OBJ_TO_PTR(mp_obj_new_tuple(2, NULL));\n",
    "    tuple->items[0] = MP_OBJ_FROM_PTR(eigenvalues);\n",
    "    tuple->items[1] = MP_OBJ_FROM_PTR(eigenvectors);\n",
    "    return tuple;\n",
    "    return MP_OBJ_FROM_PTR(eigenvalues);\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Polynomial"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## poly.h"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-03T19:48:28.659765Z",
     "start_time": "2019-12-03T19:48:28.644063Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "written 315 bytes to poly.h\n"
     ]
    }
   ],
   "source": [
    "%%ccode poly.h\n",
    "\n",
    "#ifndef _POLY_\n",
    "#define _POLY_\n",
    "\n",
    "mp_obj_t poly_polyval(mp_obj_t , mp_obj_t );\n",
    "mp_obj_t poly_polyfit(size_t  , const mp_obj_t *);\n",
    "\n",
    "#endif"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## poly.c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-03T20:01:04.824131Z",
     "start_time": "2019-12-03T20:01:04.818722Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "written 5781 bytes to poly.c\n"
     ]
    }
   ],
   "source": [
    "%%ccode poly.c\n",
    "\n",
    "#include \"py/obj.h\"\n",
    "#include \"py/runtime.h\"\n",
    "#include \"py/objarray.h\"\n",
    "#include \"ndarray.h\"\n",
    "#include \"linalg.h\"\n",
    "#include \"poly.h\"\n",
    "\n",
    "void fill_array_iterable(mp_float_t *array, mp_obj_t oin) {\n",
    "    mp_obj_iter_buf_t buf;\n",
    "    mp_obj_t item, iterable = mp_getiter(oin, &buf);\n",
    "    while((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {\n",
    "        *array++ = mp_obj_get_float(item);\n",
    "    }\n",
    "}\n",
    "\n",
    "mp_obj_t poly_polyval(mp_obj_t o_p, mp_obj_t o_x) {\n",
    "    // we always return floats: polynomials are going to be of type float, except, \n",
    "    // when both the coefficients and the independent variable are integers; \n",
    "    uint8_t plen = mp_obj_get_int(mp_obj_len_maybe(o_p));\n",
    "    mp_float_t *p = m_new(mp_float_t, plen);\n",
    "    fill_array_iterable(p, o_p);\n",
    "    ndarray_obj_t *ndarray;\n",
    "    mp_float_t *array;\n",
    "    if(MP_OBJ_IS_TYPE(o_x, &ulab_ndarray_type)) {\n",
    "        ndarray_obj_t *input = MP_OBJ_TO_PTR(o_x);\n",
    "        ndarray = ndarray_copy_view(input, NDARRAY_FLOAT);\n",
    "        array = (mp_float_t *)ndarray->array->items;\n",
    "    } else { // at this point, we should have a 1-D iterable\n",
    "        size_t len = mp_obj_get_int(mp_obj_len_maybe(o_x));\n",
    "        ndarray = ndarray_new_linear_array(len, NDARRAY_FLOAT);\n",
    "        array = (mp_float_t *)ndarray->array->items;\n",
    "        fill_array_iterable(array, o_x);\n",
    "    }\n",
    "    mp_float_t x, y;\n",
    "    for(size_t i=0; i < ndarray->len; i++) {\n",
    "        x = array[i];\n",
    "        y = p[0];\n",
    "        for(uint8_t j=0; j < plen-1; j++) {\n",
    "            y *= x;\n",
    "            y += p[j+1];\n",
    "        }\n",
    "        array[i] = y;\n",
    "    }\n",
    "    m_del(mp_float_t, p, plen);\n",
    "    return MP_OBJ_FROM_PTR(ndarray);\n",
    "}\n",
    "\n",
    "mp_obj_t poly_polyfit(size_t n_args, const mp_obj_t *args) {\n",
    "    if(n_args != 3) {\n",
    "        mp_raise_ValueError(\"number of arguments must be 3\");\n",
    "    }\n",
    "    if(!MP_OBJ_IS_TYPE(args[0], &ulab_ndarray_type) && !MP_OBJ_IS_TYPE(args[0], &mp_type_tuple) &&\n",
    "      !MP_OBJ_IS_TYPE(args[0], &mp_type_list) && !MP_OBJ_IS_TYPE(args[0], &mp_type_range) &&\n",
    "      !MP_OBJ_IS_TYPE(args[1], &ulab_ndarray_type) && !MP_OBJ_IS_TYPE(args[1], &mp_type_tuple) &&\n",
    "      !MP_OBJ_IS_TYPE(args[1], &mp_type_list) && !MP_OBJ_IS_TYPE(args[1], &mp_type_range)) {\n",
    "        mp_raise_ValueError(\"input data must be a 1D iterable\");\n",
    "    }\n",
    "    uint16_t lenx, leny;\n",
    "    uint8_t deg;\n",
    "    mp_float_t *x, *XT, *y, *prod;\n",
    "\n",
    "    lenx = (uint16_t)mp_obj_get_int(mp_obj_len_maybe(args[0]));\n",
    "    leny = (uint16_t)mp_obj_get_int(mp_obj_len_maybe(args[1]));\n",
    "    if(lenx != leny) {\n",
    "        mp_raise_ValueError(\"input vectors must be of equal length\");\n",
    "    }\n",
    "    deg = (uint8_t)mp_obj_get_int(args[2]);\n",
    "    if(leny < deg) {\n",
    "        mp_raise_ValueError(\"more degrees of freedom than data points\");\n",
    "    }\n",
    "    x = m_new(mp_float_t, lenx);\n",
    "    fill_array_iterable(x, args[0]);\n",
    "    y = m_new(mp_float_t, leny);\n",
    "    fill_array_iterable(y, args[1]);\n",
    "    \n",
    "    // one could probably express X as a function of XT, \n",
    "    // and thereby save RAM, because X is used only in the product\n",
    "    XT = m_new(mp_float_t, (deg+1)*leny); // XT is a matrix of shape (deg+1, len) (rows, columns)\n",
    "    for(uint8_t i=0; i < leny; i++) { // column index\n",
    "        XT[i+0*lenx] = 1.0; // top row\n",
    "        for(uint8_t j=1; j < deg+1; j++) { // row index\n",
    "            XT[i+j*leny] = XT[i+(j-1)*leny]*x[i];\n",
    "        }\n",
    "    }\n",
    "    \n",
    "    prod = m_new(mp_float_t, (deg+1)*(deg+1)); // the product matrix is of shape (deg+1, deg+1)\n",
    "    mp_float_t sum;\n",
    "    for(uint16_t i=0; i < deg+1; i++) { // column index\n",
    "        for(uint16_t j=0; j < deg+1; j++) { // row index\n",
    "            sum = 0.0;\n",
    "            for(size_t k=0; k < lenx; k++) {\n",
    "                // (j, k) * (k, i) \n",
    "                // Note that the second matrix is simply the transpose of the first: \n",
    "                // X(k, i) = XT(i, k) = XT[k*lenx+i]\n",
    "                sum += XT[j*lenx+k]*XT[i*lenx+k]; // X[k*(deg+1)+i];\n",
    "            }\n",
    "            prod[j*(deg+1)+i] = sum;\n",
    "        }\n",
    "    }\n",
    "    if(!linalg_invert_matrix(prod, deg+1)) {\n",
    "        // Although X was a Vandermonde matrix, whose inverse is guaranteed to exist, \n",
    "        // we bail out here, if prod couldn't be inverted: if the values in x are not all \n",
    "        // distinct, prod is singular\n",
    "        m_del(mp_float_t, XT, (deg+1)*lenx);\n",
    "        m_del(mp_float_t, x, lenx);\n",
    "        m_del(mp_float_t, y, lenx);\n",
    "        m_del(mp_float_t, prod, (deg+1)*(deg+1));\n",
    "        mp_raise_ValueError(\"could not invert Vandermonde matrix\");\n",
    "    } \n",
    "    // at this point, we have the inverse of X^T * X\n",
    "    // y is a column vector; x is free now, we can use it for storing intermediate values\n",
    "    for(uint16_t i=0; i < deg+1; i++) { // row index\n",
    "        sum = 0.0;\n",
    "        for(uint16_t j=0; j < lenx; j++) { // column index\n",
    "            sum += XT[i*lenx+j]*y[j];\n",
    "        }\n",
    "        x[i] = sum;\n",
    "    }\n",
    "    // XT is no longer needed\n",
    "    m_del(mp_float_t, XT, (deg+1)*leny);\n",
    "    \n",
    "    ndarray_obj_t *beta = ndarray_new_linear_array(deg+1, NDARRAY_FLOAT);\n",
    "    mp_float_t *betav = (mp_float_t *)beta->array->items;\n",
    "    // x[0..(deg+1)] contains now the product X^T * y; we can get rid of y\n",
    "    m_del(float, y, leny);\n",
    "    \n",
    "    // now, we calculate beta, i.e., we apply prod = (X^T * X)^(-1) on x = X^T * y; x is a column vector now\n",
    "    for(uint16_t i=0; i < deg+1; i++) {\n",
    "        sum = 0.0;\n",
    "        for(uint16_t j=0; j < deg+1; j++) {\n",
    "            sum += prod[i*(deg+1)+j]*x[j];\n",
    "        }\n",
    "        betav[i] = sum;\n",
    "    }\n",
    "    m_del(mp_float_t, x, lenx);\n",
    "    m_del(mp_float_t, prod, (deg+1)*(deg+1));\n",
    "    for(uint8_t i=0; i < (deg+1)/2; i++) {\n",
    "        // We have to reverse the array, for the leading coefficient comes first. \n",
    "        SWAP(mp_float_t, betav[i], betav[deg-i]);\n",
    "    }\n",
    "    return MP_OBJ_FROM_PTR(beta);\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Numerical"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## numerical.h"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 456,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-12T21:08:05.130728Z",
     "start_time": "2019-12-12T21:08:05.124952Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "written 2211 bytes to numerical.h\n"
     ]
    }
   ],
   "source": [
    "%%ccode numerical.h\n",
    "\n",
    "#ifndef _NUMERICAL_\n",
    "#define _NUMERICAL_\n",
    "\n",
    "#include \"ndarray.h\"\n",
    "\n",
    "mp_obj_t numerical_linspace(size_t , const mp_obj_t *, mp_map_t *);\n",
    "\n",
    "mp_obj_t numerical_sum(size_t , const mp_obj_t *, mp_map_t *);\n",
    "mp_obj_t numerical_mean(size_t , const mp_obj_t *, mp_map_t *);\n",
    "mp_obj_t numerical_std(size_t , const mp_obj_t *, mp_map_t *);\n",
    "\n",
    "#define CALCULATE_SUM(ndarray, type, farray, shape_ax, index, stride, offset, optype) do {\\\n",
    "    type *array = (type *)(ndarray)->array->items;\\\n",
    "    (farray)[(index)] = 0.0;\\\n",
    "    for(size_t j=0; j < (shape_ax); j++, (offset) += (stride)) {\\\n",
    "        (farray)[(index)] += array[(offset)];\\\n",
    "    }\\\n",
    "} while(0)\n",
    "\n",
    "// TODO: this can be done without the NDARRAY_INDEX_FROM_FLAT macro\n",
    "// Welford algorithm for the standard deviation\n",
    "#define CALCULATE_FLAT_SUM_STD(ndarray, type, value, shape_strides, optype) do {\\\n",
    "    type *array = (type *)(ndarray)->array->items;\\\n",
    "    (value) = 0.0;\\\n",
    "    mp_float_t m = 0.0, mtmp;\\\n",
    "    size_t index, nindex;\\\n",
    "    for(size_t j=0; j < (ndarray)->len; j++) {\\\n",
    "        NDARRAY_INDEX_FROM_FLAT((ndarray), (shape_strides), j, index, nindex);\\\n",
    "        if((optype) == NUMERICAL_STD) {\\\n",
    "            mtmp = m;\\\n",
    "            m = mtmp + (array[nindex] - mtmp) / (j+1);\\\n",
    "            (value) += (array[nindex] - mtmp) * (array[nindex] - m);\\\n",
    "        } else {\\\n",
    "            (value) += array[nindex];\\\n",
    "        }\\\n",
    "    }\\\n",
    "} while(0)\n",
    "\n",
    "// we calculate the standard deviation in two passes, in order to avoid negative values through truncation errors\n",
    "// We could do in a single pass, if we resorted to the Welford algorithm above\n",
    "#define CALCULATE_STD(ndarray, type, sq_sum, shape_ax, stride, offset) do {\\\n",
    "    type *array = (type *)(ndarray)->array->items;\\\n",
    "    mp_float_t x, ave = 0.0;\\\n",
    "    (sq_sum) = 0.0;\\\n",
    "    size_t j, _offset = (offset);\\\n",
    "    for(j=0; j < (shape_ax); j++, _offset += (stride)) {\\\n",
    "        ave += array[_offset];\\\n",
    "    }\\\n",
    "    ave /= j;\\\n",
    "    for(j=0; j < (shape_ax); j++, (offset) += (stride)) {\\\n",
    "        x = array[(offset)] - ave;\\\n",
    "        (sq_sum) += x * x;\\\n",
    "    }\\\n",
    "} while(0)\n",
    "\n",
    "#endif"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## numerical.c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 203,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-05T19:52:28.670813Z",
     "start_time": "2019-12-05T19:52:28.650669Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "written 14314 bytes to numerical.c\n"
     ]
    }
   ],
   "source": [
    "%%ccode numerical.c\n",
    "\n",
    "#include <math.h>\n",
    "#include <stdlib.h>\n",
    "#include <string.h>\n",
    "#include \"py/obj.h\"\n",
    "#include \"py/objint.h\"\n",
    "#include \"py/runtime.h\"\n",
    "#include \"py/builtin.h\"\n",
    "#include \"py/misc.h\"\n",
    "#include \"numerical.h\"\n",
    "\n",
    "enum NUMERICAL_FUNCTION_TYPE {\n",
    "    NUMERICAL_MIN,\n",
    "    NUMERICAL_MAX,\n",
    "    NUMERICAL_ARGMIN,\n",
    "    NUMERICAL_ARGMAX,\n",
    "    NUMERICAL_SUM,\n",
    "    NUMERICAL_MEAN,\n",
    "    NUMERICAL_STD,\n",
    "};\n",
    "\n",
    "// creates the shape and strides arrays of the contracted ndarray\n",
    "ndarray_header_obj_t contracted_shape_strides(ndarray_obj_t *ndarray, int8_t axis) {\n",
    "    if(axis < 0) axis += ndarray->ndim;\n",
    "    if((axis > ndarray->ndim-1) || (axis < 0)) {\n",
    "        mp_raise_ValueError(\"tuple index out of range\");\n",
    "    }\n",
    "    size_t *shape = m_new(size_t, ndarray->ndim-1);\n",
    "    int32_t *strides = m_new(int32_t, ndarray->ndim-1);\n",
    "    for(size_t i=0, j=0; i < ndarray->ndim; i++) {\n",
    "        if(axis != i) {\n",
    "            shape[j] = ndarray->shape[j];\n",
    "            j++;\n",
    "        }\n",
    "    }\n",
    "    int32_t stride = 1;\n",
    "    for(size_t i=0; i < ndarray->ndim-1; i++) {\n",
    "        strides[ndarray->ndim-2-i] = stride;\n",
    "        stride *= shape[ndarray->ndim-2-i];\n",
    "    }\n",
    "    ndarray_header_obj_t header;\n",
    "    header.shape = shape;\n",
    "    header.strides = strides;\n",
    "    header.axis = axis;\n",
    "    return header;\n",
    "}\n",
    "\n",
    "mp_obj_t numerical_linspace(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {\n",
    "    static const mp_arg_t allowed_args[] = {\n",
    "        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_PTR(&mp_const_none_obj) } },\n",
    "        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_PTR(&mp_const_none_obj) } },\n",
    "        { MP_QSTR_num, MP_ARG_INT, {.u_int = 50} },\n",
    "        { MP_QSTR_endpoint, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_PTR(&mp_const_true_obj)} },\n",
    "        { MP_QSTR_retstep, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_PTR(&mp_const_false_obj)} },\n",
    "        { MP_QSTR_dtype, MP_ARG_KW_ONLY | MP_ARG_INT, {.u_int = NDARRAY_FLOAT} },\n",
    "    };\n",
    "\n",
    "    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];\n",
    "    mp_arg_parse_all(2, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);\n",
    "\n",
    "    uint16_t len = args[2].u_int;\n",
    "    if(len < 2) {\n",
    "        mp_raise_ValueError(\"number of points must be at least 2\");\n",
    "    }\n",
    "    mp_float_t value, step;\n",
    "    value = mp_obj_get_float(args[0].u_obj);\n",
    "    uint8_t typecode = args[5].u_int;\n",
    "    if(args[3].u_obj == mp_const_true) step = (mp_obj_get_float(args[1].u_obj)-value)/(len-1);\n",
    "    else step = (mp_obj_get_float(args[1].u_obj)-value)/len;\n",
    "    ndarray_obj_t *ndarray = ndarray_new_linear_array(len, typecode);\n",
    "    if(typecode == NDARRAY_UINT8) {\n",
    "        uint8_t *array = (uint8_t *)ndarray->array->items;\n",
    "        for(size_t i=0; i < len; i++, value += step) array[i] = (uint8_t)value;\n",
    "    } else if(typecode == NDARRAY_INT8) {\n",
    "        int8_t *array = (int8_t *)ndarray->array->items;\n",
    "        for(size_t i=0; i < len; i++, value += step) array[i] = (int8_t)value;\n",
    "    } else if(typecode == NDARRAY_UINT16) {\n",
    "        uint16_t *array = (uint16_t *)ndarray->array->items;\n",
    "        for(size_t i=0; i < len; i++, value += step) array[i] = (uint16_t)value;\n",
    "    } else if(typecode == NDARRAY_INT16) {\n",
    "        int16_t *array = (int16_t *)ndarray->array->items;\n",
    "        for(size_t i=0; i < len; i++, value += step) array[i] = (int16_t)value;\n",
    "    } else {\n",
    "        mp_float_t *array = (mp_float_t *)ndarray->array->items;\n",
    "        for(size_t i=0; i < len; i++, value += step) array[i] = value;\n",
    "    }\n",
    "    if(args[4].u_obj == mp_const_false) {\n",
    "        return MP_OBJ_FROM_PTR(ndarray);\n",
    "    } else {\n",
    "        mp_obj_t tuple[2];\n",
    "        tuple[0] = ndarray;\n",
    "        tuple[1] = mp_obj_new_float(step);\n",
    "        return mp_obj_new_tuple(2, tuple);\n",
    "    }\n",
    "}\n",
    "\n",
    "mp_obj_t numerical_flat_sum_mean_std(ndarray_obj_t *ndarray, uint8_t optype, size_t ddof) {\n",
    "    mp_float_t value;\n",
    "    int32_t *shape_strides = m_new(int32_t, ndarray->ndim);\n",
    "    shape_strides[ndarray->ndim-1] = ndarray->strides[ndarray->ndim-1];\n",
    "    for(uint8_t i=ndarray->ndim-1; i > 0; i--) {\n",
    "        shape_strides[i-1] = shape_strides[i] * ndarray->shape[i-1];\n",
    "    }\n",
    "    if(ndarray->array->typecode == NDARRAY_UINT8) {\n",
    "        CALCULATE_FLAT_SUM_STD(ndarray, uint8_t, value, shape_strides, optype);\n",
    "    } else if(ndarray->array->typecode == NDARRAY_INT8) {\n",
    "        CALCULATE_FLAT_SUM_STD(ndarray, int8_t, value, shape_strides, optype);\n",
    "    } if(ndarray->array->typecode == NDARRAY_UINT16) {\n",
    "        CALCULATE_FLAT_SUM_STD(ndarray, uint16_t, value, shape_strides, optype);\n",
    "    } else if(ndarray->array->typecode == NDARRAY_INT16) {\n",
    "        CALCULATE_FLAT_SUM_STD(ndarray, int16_t, value, shape_strides, optype);\n",
    "    } else {\n",
    "        CALCULATE_FLAT_SUM_STD(ndarray, mp_float_t, value, shape_strides, optype);\n",
    "    }\n",
    "    m_del(int32_t, shape_strides, ndarray->ndim);\n",
    "    if(optype == NUMERICAL_SUM) {\n",
    "        return mp_obj_new_float(value);\n",
    "    } else if(optype == NUMERICAL_MEAN) {\n",
    "        return mp_obj_new_float(value/ndarray->len);\n",
    "    } else {\n",
    "        return mp_obj_new_float(MICROPY_FLOAT_C_FUN(sqrt)(value/(ndarray->len-ddof)));\n",
    "    }\n",
    "}   \n",
    "\n",
    "// numerical functions for ndarrays\n",
    "mp_obj_t numerical_sum_mean_ndarray(ndarray_obj_t *ndarray, mp_obj_t axis, uint8_t optype) {\n",
    "    if(axis == mp_const_none) {\n",
    "        return numerical_flat_sum_mean_std(ndarray, optype, 0);\n",
    "    } else {\n",
    "        int8_t ax = mp_obj_get_int(axis);\n",
    "        ndarray_header_obj_t header = contracted_shape_strides(ndarray, ax);\n",
    "        ndarray_obj_t *result = ndarray_new_ndarray(ndarray->ndim-1, header.shape, header.strides, NDARRAY_FLOAT);\n",
    "        mp_float_t *farray = (mp_float_t *)result->array->items;\n",
    "        size_t offset;\n",
    "        // iterate along the length of the output array, so as to avoid recursion\n",
    "        for(size_t i=0; i < result->array->len; i++) {\n",
    "            offset = ndarray_index_from_contracted(i, ndarray, result->strides, result->ndim, header.axis) + ndarray->offset;\n",
    "            if(ndarray->array->typecode == NDARRAY_UINT8) {\n",
    "                CALCULATE_SUM(ndarray, uint8_t, farray, ndarray->shape[header.axis], i, ndarray->strides[header.axis], offset, optype);\n",
    "            } else if(ndarray->array->typecode == NDARRAY_INT8) {\n",
    "                CALCULATE_SUM(ndarray, int8_t, farray, ndarray->shape[header.axis], i, ndarray->strides[header.axis], offset, optype); \n",
    "            } else if(ndarray->array->typecode == NDARRAY_UINT16) {\n",
    "                CALCULATE_SUM(ndarray, uint16_t, farray, ndarray->shape[header.axis], i, ndarray->strides[header.axis], offset, optype);     \n",
    "            } else if(ndarray->array->typecode == NDARRAY_INT16) {\n",
    "                CALCULATE_SUM(ndarray, int16_t, farray, ndarray->shape[header.axis], i, ndarray->strides[header.axis], offset, optype);\n",
    "            } else {\n",
    "                CALCULATE_SUM(ndarray, mp_float_t, farray, ndarray->shape[header.axis], i, ndarray->strides[header.axis], offset, optype);       \n",
    "            }\n",
    "            if(optype == NUMERICAL_MEAN) farray[i] /= ndarray->shape[header.axis];\n",
    "        }\n",
    "        return MP_OBJ_FROM_PTR(result);\n",
    "    }\n",
    "    return mp_const_none;\n",
    "}\n",
    "\n",
    "mp_obj_t numerical_argmin_argmax_ndarray(ndarray_obj_t *ndarray, mp_obj_t axis, uint8_t optype) {\n",
    "    return mp_const_none;\n",
    "}\n",
    "\n",
    "// numerical function for interables (single axis)\n",
    "mp_obj_t numerical_argmin_argmax_iterable(mp_obj_t oin, uint8_t optype) {\n",
    "    size_t idx = 0, best_idx = 0;\n",
    "    mp_obj_iter_buf_t iter_buf;\n",
    "    mp_obj_t iterable = mp_getiter(oin, &iter_buf);\n",
    "    mp_obj_t best_obj = MP_OBJ_NULL;\n",
    "    mp_obj_t item;\n",
    "    mp_uint_t op = MP_BINARY_OP_LESS;\n",
    "    if((optype == NUMERICAL_ARGMAX) || (optype == NUMERICAL_MAX)) op = MP_BINARY_OP_MORE;\n",
    "    while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {\n",
    "        if ((best_obj == MP_OBJ_NULL) || (mp_binary_op(op, item, best_obj) == mp_const_true)) {\n",
    "            best_obj = item;\n",
    "            best_idx = idx;\n",
    "        }\n",
    "        idx++;\n",
    "    }\n",
    "    if((optype == NUMERICAL_ARGMIN) || (optype == NUMERICAL_ARGMAX)) {\n",
    "        return MP_OBJ_NEW_SMALL_INT(best_idx);\n",
    "    } else {\n",
    "        return best_obj;\n",
    "    }    \n",
    "}\n",
    "\n",
    "mp_obj_t numerical_sum_mean_std_iterable(mp_obj_t oin, uint8_t optype, size_t ddof) {\n",
    "    mp_float_t value, sum = 0.0, sq_sum = 0.0;\n",
    "    mp_obj_iter_buf_t iter_buf;\n",
    "    mp_obj_t item, iterable = mp_getiter(oin, &iter_buf);\n",
    "    mp_int_t len = mp_obj_get_int(mp_obj_len(oin));\n",
    "    while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {\n",
    "        value = mp_obj_get_float(item);\n",
    "        sum += value;\n",
    "    }\n",
    "    if(optype ==  NUMERICAL_SUM) {\n",
    "        return mp_obj_new_float(sum);\n",
    "    } else if(optype == NUMERICAL_MEAN) {\n",
    "        return mp_obj_new_float(sum/len);\n",
    "    } else { // this should be the case of the standard deviation\n",
    "        sum /= len; // this is the mean now\n",
    "        iterable = mp_getiter(oin, &iter_buf);\n",
    "        while ((item = mp_iternext(iterable)) != MP_OBJ_STOP_ITERATION) {\n",
    "            value = mp_obj_get_float(item) - sum;\n",
    "            sq_sum += value * value;\n",
    "        }\n",
    "        return mp_obj_new_float(MICROPY_FLOAT_C_FUN(sqrt)(sq_sum/(len-ddof)));\n",
    "    }\n",
    "}\n",
    "\n",
    "STATIC mp_obj_t numerical_function(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args, uint8_t optype) {\n",
    "    static const mp_arg_t allowed_args[] = {\n",
    "        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_PTR(&mp_const_none_obj)} } ,\n",
    "        { MP_QSTR_axis, MP_ARG_OBJ, {.u_rom_obj = MP_ROM_PTR(&mp_const_none_obj)} },\n",
    "    };\n",
    "\n",
    "    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];\n",
    "    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);\n",
    "    \n",
    "    mp_obj_t oin = args[0].u_obj;\n",
    "    mp_obj_t axis = args[1].u_obj;\n",
    "    \n",
    "    if(MP_OBJ_IS_TYPE(oin, &mp_type_tuple) || MP_OBJ_IS_TYPE(oin, &mp_type_list) || \n",
    "        MP_OBJ_IS_TYPE(oin, &mp_type_range)) {\n",
    "        switch(optype) {\n",
    "            case NUMERICAL_MIN:\n",
    "            case NUMERICAL_ARGMIN:\n",
    "            case NUMERICAL_MAX:\n",
    "            case NUMERICAL_ARGMAX:\n",
    "                return numerical_argmin_argmax_iterable(oin, optype);\n",
    "            case NUMERICAL_SUM:\n",
    "            case NUMERICAL_MEAN:\n",
    "                return numerical_sum_mean_std_iterable(oin, optype, 0);\n",
    "            default: // we should never end up here\n",
    "                return mp_const_none;\n",
    "        }\n",
    "    } else if(MP_OBJ_IS_TYPE(oin, &ulab_ndarray_type)) {\n",
    "        ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(oin);\n",
    "        switch(optype) {\n",
    "            case NUMERICAL_MIN:\n",
    "            case NUMERICAL_ARGMIN:\n",
    "            case NUMERICAL_MAX:\n",
    "            case NUMERICAL_ARGMAX:\n",
    "                return numerical_argmin_argmax_ndarray(ndarray, axis, optype);\n",
    "            case NUMERICAL_SUM:\n",
    "            case NUMERICAL_MEAN:\n",
    "                return numerical_sum_mean_ndarray(ndarray, args[1].u_obj, optype);\n",
    "            default:\n",
    "                return mp_const_none;\n",
    "        }\n",
    "    } else {\n",
    "        mp_raise_TypeError(\"input must be tuple, list, range, or ndarray\");\n",
    "    }\n",
    "    return mp_const_none;\n",
    "}\n",
    "\n",
    "mp_obj_t numerical_sum(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {\n",
    "    return numerical_function(n_args, pos_args, kw_args, NUMERICAL_SUM);\n",
    "}\n",
    "\n",
    "mp_obj_t numerical_mean(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {\n",
    "    return numerical_function(n_args, pos_args, kw_args, NUMERICAL_MEAN);\n",
    "}\n",
    "\n",
    "mp_obj_t numerical_std(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {\n",
    "    static const mp_arg_t allowed_args[] = {\n",
    "        { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_PTR(&mp_const_none_obj)} } ,\n",
    "        { MP_QSTR_axis, MP_ARG_OBJ, {.u_rom_obj = MP_ROM_PTR(&mp_const_none_obj)} },\n",
    "        { MP_QSTR_ddof, MP_ARG_KW_ONLY | MP_ARG_INT, {.u_int = 0} },\n",
    "    };\n",
    "\n",
    "    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];\n",
    "    mp_arg_parse_all(n_args, pos_args, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);\n",
    "    \n",
    "    mp_obj_t oin = args[0].u_obj;\n",
    "    mp_obj_t axis = args[1].u_obj;\n",
    "    size_t ddof = args[2].u_int;\n",
    "    if(MP_OBJ_IS_TYPE(oin, &mp_type_tuple) || MP_OBJ_IS_TYPE(oin, &mp_type_list) || MP_OBJ_IS_TYPE(oin, &mp_type_range)) {\n",
    "        return numerical_sum_mean_std_iterable(oin, NUMERICAL_STD, ddof);\n",
    "    } else if(MP_OBJ_IS_TYPE(oin, &ulab_ndarray_type)) {\n",
    "        ndarray_obj_t *ndarray = MP_OBJ_TO_PTR(oin);\n",
    "        if(axis == mp_const_none) { // calculate for the flat array\n",
    "            return numerical_flat_sum_mean_std(ndarray, NUMERICAL_STD, ddof);\n",
    "        } else {\n",
    "            int8_t ax = mp_obj_get_int(axis);\n",
    "            ndarray_header_obj_t header = contracted_shape_strides(ndarray, ax);\n",
    "            ndarray_obj_t *result = ndarray_new_ndarray(ndarray->ndim-1, header.shape, header.strides, NDARRAY_FLOAT);\n",
    "            mp_float_t *farray = (mp_float_t *)result->array->items, sum_sq;\n",
    "            size_t offset;\n",
    "            // iterate along the length of the output array, so as to avoid recursion\n",
    "            for(size_t i=0; i < result->array->len; i++) {\n",
    "                offset = ndarray_index_from_contracted(i, ndarray, result->strides, result->ndim, header.axis) + ndarray->offset;\n",
    "                if(ndarray->array->typecode == NDARRAY_UINT8) {\n",
    "                    CALCULATE_STD(ndarray, uint8_t, sum_sq, ndarray->shape[header.axis], ndarray->strides[header.axis], offset);\n",
    "                } else if(ndarray->array->typecode == NDARRAY_INT8) {\n",
    "                    CALCULATE_STD(ndarray, int8_t, sum_sq, ndarray->shape[header.axis], ndarray->strides[header.axis], offset); \n",
    "                } else if(ndarray->array->typecode == NDARRAY_UINT16) {\n",
    "                    CALCULATE_STD(ndarray, uint16_t, sum_sq, ndarray->shape[header.axis], ndarray->strides[header.axis], offset);     \n",
    "                } else if(ndarray->array->typecode == NDARRAY_INT16) {\n",
    "                    CALCULATE_STD(ndarray, int16_t, sum_sq, ndarray->shape[header.axis], ndarray->strides[header.axis], offset);\n",
    "                } else {\n",
    "                    CALCULATE_STD(ndarray, mp_float_t, sum_sq, ndarray->shape[header.axis], ndarray->strides[header.axis], offset);\n",
    "                }\n",
    "                farray[i] = MICROPY_FLOAT_C_FUN(sqrt)(sum_sq / (ndarray->shape[header.axis] - ddof));\n",
    "            }\n",
    "            return MP_OBJ_FROM_PTR(result);\n",
    "        }\n",
    "        mp_raise_TypeError(\"input must be tuple, list, range, or ndarray\");\n",
    "    }\n",
    "    return mp_const_none;\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# FFT"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## fft.h"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-04T05:54:26.719541Z",
     "start_time": "2019-12-04T05:54:26.715764Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "written 492 bytes to fft.h\n"
     ]
    }
   ],
   "source": [
    "%%ccode fft.h\n",
    "\n",
    "#ifndef _FFT_\n",
    "#define _FFT_\n",
    "\n",
    "#ifndef MP_PI\n",
    "#define MP_PI MICROPY_FLOAT_CONST(3.14159265358979323846)\n",
    "#endif\n",
    "\n",
    "#define SWAP(t, a, b) { t tmp = a; a = b; b = tmp; }\n",
    "\n",
    "mp_obj_t fft_fft(size_t , const mp_obj_t *);\n",
    "mp_obj_t fft_ifft(size_t , const mp_obj_t *);\n",
    "mp_obj_t fft_spectrum(size_t , const mp_obj_t *);\n",
    "\n",
    "#endif"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## fft.c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-04T05:59:01.645829Z",
     "start_time": "2019-12-04T05:59:01.630874Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "written 5056 bytes to fft.c\n"
     ]
    }
   ],
   "source": [
    "%%ccode fft.c\n",
    "\n",
    "#include <math.h>\n",
    "#include <stdio.h>\n",
    "#include <stdlib.h>\n",
    "#include <string.h>\n",
    "#include \"py/runtime.h\"\n",
    "#include \"py/binary.h\"\n",
    "#include \"py/obj.h\"\n",
    "#include \"py/objarray.h\"\n",
    "#include \"ndarray.h\"\n",
    "#include \"fft.h\"\n",
    "\n",
    "enum FFT_TYPE {\n",
    "    FFT_FFT,\n",
    "    FFT_IFFT,\n",
    "    FFT_SPECTRUM,\n",
    "};\n",
    "\n",
    "void fft_kernel(mp_float_t *real, mp_float_t *imag, int n, int isign) {\n",
    "    // This is basically a modification of four1 from Numerical Recipes\n",
    "    // The main difference is that this function takes two arrays, one \n",
    "    // for the real, and one for the imaginary parts. \n",
    "    int j, m, mmax, istep;\n",
    "    mp_float_t tempr, tempi;\n",
    "    mp_float_t wtemp, wr, wpr, wpi, wi, theta;\n",
    "\n",
    "    j = 0;\n",
    "    for(int i = 0; i < n; i++) {\n",
    "        if (j > i) {\n",
    "            SWAP(mp_float_t, real[i], real[j]);\n",
    "            SWAP(mp_float_t, imag[i], imag[j]);\n",
    "        }\n",
    "        m = n >> 1;\n",
    "        while (j >= m && m > 0) {\n",
    "            j -= m;\n",
    "            m >>= 1;\n",
    "        }\n",
    "        j += m;\n",
    "    }\n",
    "\n",
    "    mmax = 1;\n",
    "    while (n > mmax) {\n",
    "        istep = mmax << 1;\n",
    "        theta = -2.0*isign*MP_PI/istep;\n",
    "        wtemp = MICROPY_FLOAT_C_FUN(sin)(0.5 * theta);\n",
    "        wpr = -2.0 * wtemp * wtemp;\n",
    "        wpi = MICROPY_FLOAT_C_FUN(sin)(theta);\n",
    "        wr = 1.0;\n",
    "        wi = 0.0;\n",
    "        for(m = 0; m < mmax; m++) {\n",
    "            for(int i = m; i < n; i += istep) {\n",
    "                j = i + mmax;\n",
    "                tempr = wr * real[j] - wi * imag[j];\n",
    "                tempi = wr * imag[j] + wi * real[j];\n",
    "                real[j] = real[i] - tempr;\n",
    "                imag[j] = imag[i] - tempi;\n",
    "                real[i] += tempr;\n",
    "                imag[i] += tempi;\n",
    "            }\n",
    "            wtemp = wr;\n",
    "            wr = wr*wpr - wi*wpi + wr;\n",
    "            wi = wi*wpr + wtemp*wpi + wi;\n",
    "        }\n",
    "        mmax = istep;\n",
    "    }\n",
    "}\n",
    "\n",
    "mp_obj_t fft_fft_ifft_spectrum(size_t n_args, mp_obj_t arg_re, mp_obj_t arg_im, uint8_t type) {\n",
    "    if(!MP_OBJ_IS_TYPE(arg_re, &ulab_ndarray_type)) {\n",
    "        mp_raise_NotImplementedError(\"FFT is defined for ndarrays only\");\n",
    "    } \n",
    "    if(n_args == 2) {\n",
    "        if(!MP_OBJ_IS_TYPE(arg_im, &ulab_ndarray_type)) {\n",
    "            mp_raise_NotImplementedError(\"FFT is defined for ndarrays only\");\n",
    "        }\n",
    "    }\n",
    "    // Check if input is of length of power of 2\n",
    "    ndarray_obj_t *re = MP_OBJ_TO_PTR(arg_re);\n",
    "    uint16_t len = re->array->len;\n",
    "    if((len & (len-1)) != 0) {\n",
    "        // TODO: pad the input vector, if the length is not a power of 2\n",
    "        mp_raise_ValueError(\"input array length must be power of 2\");\n",
    "    }\n",
    "    \n",
    "    ndarray_obj_t *ndarray_re = ndarray_new_linear_array(len, NDARRAY_FLOAT);\n",
    "    mp_float_t *data_re = (mp_float_t *)ndarray_re->array->items;\n",
    "    \n",
    "    for(size_t i=0; i < len; i++) {\n",
    "        data_re[i] = ndarray_get_float_value(re->array->items, re->array->typecode, re->offset+i*re->strides[0]);\n",
    "    }\n",
    "    ndarray_obj_t *ndarray_im = ndarray_new_linear_array(len, NDARRAY_FLOAT);\n",
    "    mp_float_t *data_im = (mp_float_t *)ndarray_im->array->items;\n",
    "\n",
    "    if(n_args == 2) {\n",
    "        ndarray_obj_t *im = MP_OBJ_TO_PTR(arg_im);\n",
    "        if (re->len != im->len) {\n",
    "            mp_raise_ValueError(\"real and imaginary parts must be of equal length\");\n",
    "        }\n",
    "        for(size_t i=0; i < len; i++) {\n",
    "            data_im[i] = ndarray_get_float_value(im->array->items, im->array->typecode, im->offset+i*im->strides[0]);\n",
    "        }\n",
    "    }\n",
    "    if((type == FFT_FFT) || (type == FFT_SPECTRUM)) {\n",
    "        fft_kernel(data_re, data_im, len, 1);\n",
    "        if(type == FFT_SPECTRUM) {\n",
    "            for(size_t i=0; i < len; i++) {\n",
    "                data_re[i] = MICROPY_FLOAT_C_FUN(sqrt)(data_re[i]*data_re[i] + data_im[i]*data_im[i]);\n",
    "            }\n",
    "        }\n",
    "    } else { // inverse transform\n",
    "        fft_kernel(data_re, data_im, len, -1);\n",
    "        // TODO: numpy accepts the norm keyword argument\n",
    "        for(size_t i=0; i < len; i++) {\n",
    "            data_re[i] /= len;\n",
    "            data_im[i] /= len;\n",
    "        }\n",
    "    }\n",
    "    if(type == FFT_SPECTRUM) {\n",
    "        return MP_OBJ_TO_PTR(ndarray_re);\n",
    "    } else {\n",
    "        mp_obj_t tuple[2];\n",
    "        tuple[0] = ndarray_re;\n",
    "        tuple[1] = ndarray_im;\n",
    "        return mp_obj_new_tuple(2, tuple);\n",
    "    }\n",
    "}\n",
    "\n",
    "mp_obj_t fft_fft(size_t n_args, const mp_obj_t *args) {\n",
    "    if(n_args == 2) {\n",
    "        return fft_fft_ifft_spectrum(n_args, args[0], args[1], FFT_FFT);\n",
    "    } else {\n",
    "        return fft_fft_ifft_spectrum(n_args, args[0], mp_const_none, FFT_FFT);        \n",
    "    }\n",
    "}\n",
    "\n",
    "mp_obj_t fft_ifft(size_t n_args, const mp_obj_t *args) {\n",
    "    if(n_args == 2) {\n",
    "        return fft_fft_ifft_spectrum(n_args, args[0], args[1], FFT_IFFT);\n",
    "    } else {\n",
    "        return fft_fft_ifft_spectrum(n_args, args[0], mp_const_none, FFT_IFFT);\n",
    "    }\n",
    "}\n",
    "\n",
    "mp_obj_t fft_spectrum(size_t n_args, const mp_obj_t *args) {\n",
    "    if(n_args == 2) {\n",
    "        return fft_fft_ifft_spectrum(n_args, args[0], args[1], FFT_SPECTRUM);\n",
    "    } else {\n",
    "        return fft_fft_ifft_spectrum(n_args, args[0], mp_const_none, FFT_SPECTRUM);\n",
    "    }\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ulab module\n",
    "\n",
    "This module simply brings all components together, and does not contain new function definitions."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## ulab.c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-04T06:08:54.358296Z",
     "start_time": "2019-12-04T06:08:54.353537Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "written 8254 bytes to ulab.c\n"
     ]
    }
   ],
   "source": [
    "%%ccode ulab.c\n",
    "\n",
    "#include <math.h>\n",
    "#include <stdio.h>\n",
    "#include <stdlib.h>\n",
    "#include <string.h>\n",
    "#include \"py/runtime.h\"\n",
    "#include \"py/binary.h\"\n",
    "#include \"py/obj.h\"\n",
    "#include \"py/objarray.h\"\n",
    "\n",
    "#include \"ndarray.h\"\n",
    "#include \"vectorise.h\"\n",
    "#include \"linalg.h\"\n",
    "#include \"poly.h\"\n",
    "#include \"numerical.h\"\n",
    "#include \"fft.h\"\n",
    "\n",
    "#define ULAB_VERSION 1.0\n",
    "\n",
    "typedef struct _mp_obj_float_t {\n",
    "    mp_obj_base_t base;\n",
    "    mp_float_t value;\n",
    "} mp_obj_float_t;\n",
    "\n",
    "mp_obj_float_t ulab_version = {{&mp_type_float}, ULAB_VERSION};\n",
    "\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(ndarray_shape_obj, ndarray_shape);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(ndarray_strides_obj, ndarray_strides);\n",
    "MP_DEFINE_CONST_FUN_OBJ_2(ndarray_reshape_obj, ndarray_reshape);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(ndarray_transpose_obj, ndarray_transpose);\n",
    "MP_DEFINE_CONST_FUN_OBJ_KW(ndarray_flatten_obj, 1, ndarray_flatten);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(ndarray_itemsize_obj, ndarray_itemsize);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(ndarray_info_obj, ndarray_info);\n",
    "\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(vectorise_acos_obj, vectorise_acos);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(vectorise_acosh_obj, vectorise_acosh);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(vectorise_asin_obj, vectorise_asin);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(vectorise_asinh_obj, vectorise_asinh);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(vectorise_atan_obj, vectorise_atan);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(vectorise_atanh_obj, vectorise_atanh);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(vectorise_ceil_obj, vectorise_ceil);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(vectorise_cos_obj, vectorise_cos);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(vectorise_erf_obj, vectorise_erf);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(vectorise_erfc_obj, vectorise_erfc);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(vectorise_exp_obj, vectorise_exp);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(vectorise_expm1_obj, vectorise_expm1);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(vectorise_floor_obj, vectorise_floor);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(vectorise_gamma_obj, vectorise_gamma);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(vectorise_lgamma_obj, vectorise_lgamma);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(vectorise_log_obj, vectorise_log);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(vectorise_log10_obj, vectorise_log10);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(vectorise_log2_obj, vectorise_log2);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(vectorise_sin_obj, vectorise_sin);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(vectorise_sinh_obj, vectorise_sinh);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(vectorise_sqrt_obj, vectorise_sqrt);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(vectorise_tan_obj, vectorise_tan);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(vectorise_tanh_obj, vectorise_tanh);\n",
    "\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(linalg_inv_obj, linalg_inv);\n",
    "MP_DEFINE_CONST_FUN_OBJ_2(linalg_dot_obj, linalg_dot);\n",
    "MP_DEFINE_CONST_FUN_OBJ_KW(linalg_zeros_obj, 0, linalg_zeros);\n",
    "MP_DEFINE_CONST_FUN_OBJ_KW(linalg_ones_obj, 0, linalg_ones);\n",
    "MP_DEFINE_CONST_FUN_OBJ_KW(linalg_eye_obj, 0, linalg_eye);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(linalg_det_obj, linalg_det);\n",
    "MP_DEFINE_CONST_FUN_OBJ_1(linalg_eig_obj, linalg_eig);\n",
    "\n",
    "MP_DEFINE_CONST_FUN_OBJ_2(poly_polyval_obj, poly_polyval);\n",
    "MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(poly_polyfit_obj, 2, 3, poly_polyfit);\n",
    "\n",
    "MP_DEFINE_CONST_FUN_OBJ_KW(numerical_linspace_obj, 2, numerical_linspace);\n",
    "MP_DEFINE_CONST_FUN_OBJ_KW(numerical_sum_obj, 1, numerical_sum);\n",
    "MP_DEFINE_CONST_FUN_OBJ_KW(numerical_mean_obj, 1, numerical_mean);\n",
    "MP_DEFINE_CONST_FUN_OBJ_KW(numerical_std_obj, 1, numerical_std);\n",
    "\n",
    "\n",
    "MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(fft_fft_obj, 1, 2, fft_fft);\n",
    "MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(fft_ifft_obj, 1, 2, fft_ifft);\n",
    "MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(fft_spectrum_obj, 1, 2, fft_spectrum);\n",
    "\n",
    "STATIC const mp_rom_map_elem_t ulab_ndarray_locals_dict_table[] = {\n",
    "    { MP_ROM_QSTR(MP_QSTR_shape), MP_ROM_PTR(&ndarray_shape_obj) },\n",
    "    { MP_ROM_QSTR(MP_QSTR_strides), MP_ROM_PTR(&ndarray_strides_obj) },\n",
    "    { MP_ROM_QSTR(MP_QSTR_reshape), MP_ROM_PTR(&ndarray_reshape_obj) },\n",
    "    { MP_ROM_QSTR(MP_QSTR_transpose), MP_ROM_PTR(&ndarray_transpose_obj) },\n",
    "    { MP_ROM_QSTR(MP_QSTR_flatten), MP_ROM_PTR(&ndarray_flatten_obj) },\n",
    "    { MP_ROM_QSTR(MP_QSTR_itemsize), MP_ROM_PTR(&ndarray_itemsize_obj) },\n",
    "};\n",
    "\n",
    "STATIC MP_DEFINE_CONST_DICT(ulab_ndarray_locals_dict, ulab_ndarray_locals_dict_table);\n",
    "\n",
    "const mp_obj_type_t ulab_ndarray_type = {\n",
    "    { &mp_type_type },\n",
    "    .name = MP_QSTR_ndarray,\n",
    "    .print = ndarray_print,\n",
    "    .make_new = ndarray_make_new,\n",
    "    .subscr = ndarray_subscr,\n",
    "    .getiter = ndarray_getiter,\n",
    "    .unary_op = ndarray_unary_op,\n",
    "    .binary_op = ndarray_binary_op,\n",
    "    .locals_dict = (mp_obj_dict_t*)&ulab_ndarray_locals_dict,\n",
    "};\n",
    "\n",
    "STATIC const mp_map_elem_t ulab_globals_table[] = {\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR___name__), MP_OBJ_NEW_QSTR(MP_QSTR_ulab) },\n",
    "    { MP_ROM_QSTR(MP_QSTR___version__), MP_ROM_PTR(&ulab_version) },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_array), (mp_obj_t)&ulab_ndarray_type },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_ndinfo), (mp_obj_t)&ndarray_info_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_acos), (mp_obj_t)&vectorise_acos_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_acosh), (mp_obj_t)&vectorise_acosh_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_asin), (mp_obj_t)&vectorise_asin_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_asinh), (mp_obj_t)&vectorise_asinh_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_atan), (mp_obj_t)&vectorise_atan_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_atanh), (mp_obj_t)&vectorise_atanh_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_ceil), (mp_obj_t)&vectorise_ceil_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_cos), (mp_obj_t)&vectorise_cos_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_erf), (mp_obj_t)&vectorise_erf_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_erfc), (mp_obj_t)&vectorise_erfc_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_exp), (mp_obj_t)&vectorise_exp_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_expm1), (mp_obj_t)&vectorise_expm1_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_floor), (mp_obj_t)&vectorise_floor_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_gamma), (mp_obj_t)&vectorise_gamma_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_lgamma), (mp_obj_t)&vectorise_lgamma_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_log), (mp_obj_t)&vectorise_log_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_log10), (mp_obj_t)&vectorise_log10_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_log2), (mp_obj_t)&vectorise_log2_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_sin), (mp_obj_t)&vectorise_sin_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_sinh), (mp_obj_t)&vectorise_sinh_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_sqrt), (mp_obj_t)&vectorise_sqrt_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_tan), (mp_obj_t)&vectorise_tan_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_tanh), (mp_obj_t)&vectorise_tanh_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_polyval), (mp_obj_t)&poly_polyval_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_polyfit), (mp_obj_t)&poly_polyfit_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_inv), (mp_obj_t)&linalg_inv_obj },\n",
    "    { MP_ROM_QSTR(MP_QSTR_dot), (mp_obj_t)&linalg_dot_obj },\n",
    "    { MP_ROM_QSTR(MP_QSTR_zeros), (mp_obj_t)&linalg_zeros_obj },\n",
    "    { MP_ROM_QSTR(MP_QSTR_ones), (mp_obj_t)&linalg_ones_obj },\n",
    "    { MP_ROM_QSTR(MP_QSTR_eye), (mp_obj_t)&linalg_eye_obj },\n",
    "    { MP_ROM_QSTR(MP_QSTR_det), (mp_obj_t)&linalg_det_obj },\n",
    "    { MP_ROM_QSTR(MP_QSTR_eig), (mp_obj_t)&linalg_eig_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_linspace), (mp_obj_t)&numerical_linspace_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_sum), (mp_obj_t)&numerical_sum_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_mean), (mp_obj_t)&numerical_mean_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_std), (mp_obj_t)&numerical_std_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_fft), (mp_obj_t)&fft_fft_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_ifft), (mp_obj_t)&fft_ifft_obj },\n",
    "    { MP_OBJ_NEW_QSTR(MP_QSTR_spectrum), (mp_obj_t)&fft_spectrum_obj },\n",
    "    // class constants\n",
    "    { MP_ROM_QSTR(MP_QSTR_bool), MP_ROM_INT(NDARRAY_BOOL) },\n",
    "    { MP_ROM_QSTR(MP_QSTR_uint8), MP_ROM_INT(NDARRAY_UINT8) },\n",
    "    { MP_ROM_QSTR(MP_QSTR_int8), MP_ROM_INT(NDARRAY_INT8) },\n",
    "    { MP_ROM_QSTR(MP_QSTR_uint16), MP_ROM_INT(NDARRAY_UINT16) },\n",
    "    { MP_ROM_QSTR(MP_QSTR_int16), MP_ROM_INT(NDARRAY_INT16) },\n",
    "    { MP_ROM_QSTR(MP_QSTR_float), MP_ROM_INT(NDARRAY_FLOAT) },\n",
    "};\n",
    "\n",
    "STATIC MP_DEFINE_CONST_DICT (\n",
    "    mp_module_ulab_globals,\n",
    "    ulab_globals_table\n",
    ");\n",
    "\n",
    "const mp_obj_module_t ulab_user_cmodule = {\n",
    "    .base = { &mp_type_module },\n",
    "    .globals = (mp_obj_dict_t*)&mp_module_ulab_globals,\n",
    "};\n",
    "\n",
    "MP_REGISTER_MODULE(MP_QSTR_ulab, ulab_user_cmodule, MODULE_ULAB_ENABLED);"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## makefile"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-03T22:06:59.277223Z",
     "start_time": "2019-12-03T22:06:59.271110Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Overwriting ../../../ulab/code/micropython.mk\n"
     ]
    }
   ],
   "source": [
    "%%writefile ../../../ulab/code/micropython.mk\n",
    "\n",
    "USERMODULES_DIR := $(USERMOD_DIR)\n",
    "\n",
    "# Add all C files to SRC_USERMOD.\n",
    "SRC_USERMOD += $(USERMODULES_DIR)/ndarray.c\n",
    "SRC_USERMOD += $(USERMODULES_DIR)/vectorise.c\n",
    "SRC_USERMOD += $(USERMODULES_DIR)/linalg.c\n",
    "SRC_USERMOD += $(USERMODULES_DIR)/poly.c\n",
    "SRC_USERMOD += $(USERMODULES_DIR)/numerical.c\n",
    "SRC_USERMOD += $(USERMODULES_DIR)/fft.c\n",
    "SRC_USERMOD += $(USERMODULES_DIR)/ulab.c\n",
    "\n",
    "CFLAGS_USERMOD += -I$(USERMODULES_DIR)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## make"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### unix port"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-11-14T05:53:50.264602Z",
     "start_time": "2019-11-14T05:53:50.259771Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/home/v923z/sandbox/micropython/v1.11/micropython/ports/unix\n"
     ]
    }
   ],
   "source": [
    "%cd ../../../micropython/ports/unix/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 457,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-12T21:30:43.796909Z",
     "start_time": "2019-12-12T21:30:43.464851Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Use make V=1 or set BUILD_VERBOSE in your environment to increase build verbosity.\n",
      "rm -f micropython\n",
      "rm -f micropython.map\n",
      "rm -rf build \n"
     ]
    }
   ],
   "source": [
    "!make clean"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 458,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-12T21:31:13.571392Z",
     "start_time": "2019-12-12T21:30:47.325649Z"
    },
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Use make V=1 or set BUILD_VERBOSE in your environment to increase build verbosity.\n",
      "Including User C Module from ../../../ulab/code\n",
      "mkdir -p build/genhdr\n",
      "GEN build/genhdr/mpversion.h\n",
      "GEN build/genhdr/moduledefs.h\n",
      "GEN build/genhdr/qstr.i.last\n",
      "GEN build/genhdr/qstr.split\n",
      "GEN build/genhdr/qstrdefs.collected.h\n",
      "QSTR updated\n",
      "GEN build/genhdr/qstrdefs.generated.h\n",
      "mkdir -p build/build/\n",
      "mkdir -p build/code/\n",
      "mkdir -p build/extmod/\n",
      "mkdir -p build/lib/axtls/crypto/\n",
      "mkdir -p build/lib/axtls/ssl/\n",
      "mkdir -p build/lib/berkeley-db-1.xx/btree/\n",
      "mkdir -p build/lib/berkeley-db-1.xx/mpool/\n",
      "mkdir -p build/lib/embed/\n",
      "mkdir -p build/lib/mp-readline/\n",
      "mkdir -p build/lib/oofatfs/\n",
      "mkdir -p build/lib/timeutils/\n",
      "mkdir -p build/lib/utils/\n",
      "mkdir -p build/py/\n",
      "CC ../../py/mpstate.c\n",
      "CC ../../py/nlr.c\n",
      "CC ../../py/nlrx86.c\n",
      "CC ../../py/nlrx64.c\n",
      "CC ../../py/nlrthumb.c\n",
      "CC ../../py/nlrpowerpc.c\n",
      "CC ../../py/nlrxtensa.c\n",
      "CC ../../py/nlrsetjmp.c\n",
      "CC ../../py/malloc.c\n",
      "CC ../../py/gc.c\n",
      "CC ../../py/pystack.c\n",
      "CC ../../py/qstr.c\n",
      "CC ../../py/vstr.c\n",
      "CC ../../py/mpprint.c\n",
      "CC ../../py/unicode.c\n",
      "CC ../../py/mpz.c\n",
      "CC ../../py/reader.c\n",
      "CC ../../py/lexer.c\n",
      "CC ../../py/parse.c\n",
      "CC ../../py/scope.c\n",
      "CC ../../py/compile.c\n",
      "CC ../../py/emitcommon.c\n",
      "CC ../../py/emitbc.c\n",
      "CC ../../py/asmbase.c\n",
      "CC ../../py/asmx64.c\n",
      "CC ../../py/emitnx64.c\n",
      "CC ../../py/asmx86.c\n",
      "CC ../../py/emitnx86.c\n",
      "CC ../../py/asmthumb.c\n",
      "CC ../../py/emitnthumb.c\n",
      "CC ../../py/emitinlinethumb.c\n",
      "CC ../../py/asmarm.c\n",
      "CC ../../py/emitnarm.c\n",
      "CC ../../py/asmxtensa.c\n",
      "CC ../../py/emitnxtensa.c\n",
      "CC ../../py/emitinlinextensa.c\n",
      "CC ../../py/emitnxtensawin.c\n",
      "CC ../../py/formatfloat.c\n",
      "CC ../../py/parsenumbase.c\n",
      "CC ../../py/parsenum.c\n",
      "CC ../../py/emitglue.c\n",
      "CC ../../py/persistentcode.c\n",
      "CC ../../py/runtime.c\n",
      "CC ../../py/runtime_utils.c\n",
      "CC ../../py/scheduler.c\n",
      "CC ../../py/nativeglue.c\n",
      "CC ../../py/ringbuf.c\n",
      "CC ../../py/stackctrl.c\n",
      "CC ../../py/argcheck.c\n",
      "CC ../../py/warning.c\n",
      "CC ../../py/profile.c\n",
      "CC ../../py/map.c\n",
      "CC ../../py/obj.c\n",
      "CC ../../py/objarray.c\n",
      "CC ../../py/objattrtuple.c\n",
      "CC ../../py/objbool.c\n",
      "CC ../../py/objboundmeth.c\n",
      "CC ../../py/objcell.c\n",
      "CC ../../py/objclosure.c\n",
      "CC ../../py/objcomplex.c\n",
      "CC ../../py/objdeque.c\n",
      "CC ../../py/objdict.c\n",
      "CC ../../py/objenumerate.c\n",
      "CC ../../py/objexcept.c\n",
      "CC ../../py/objfilter.c\n",
      "CC ../../py/objfloat.c\n",
      "CC ../../py/objfun.c\n",
      "CC ../../py/objgenerator.c\n",
      "CC ../../py/objgetitemiter.c\n",
      "CC ../../py/objint.c\n",
      "CC ../../py/objint_longlong.c\n",
      "CC ../../py/objint_mpz.c\n",
      "CC ../../py/objlist.c\n",
      "CC ../../py/objmap.c\n",
      "CC ../../py/objmodule.c\n",
      "CC ../../py/objobject.c\n",
      "CC ../../py/objpolyiter.c\n",
      "CC ../../py/objproperty.c\n",
      "CC ../../py/objnone.c\n",
      "CC ../../py/objnamedtuple.c\n",
      "CC ../../py/objrange.c\n",
      "CC ../../py/objreversed.c\n",
      "CC ../../py/objset.c\n",
      "CC ../../py/objsingleton.c\n",
      "CC ../../py/objslice.c\n",
      "CC ../../py/objstr.c\n",
      "CC ../../py/objstrunicode.c\n",
      "CC ../../py/objstringio.c\n",
      "CC ../../py/objtuple.c\n",
      "CC ../../py/objtype.c\n",
      "CC ../../py/objzip.c\n",
      "CC ../../py/opmethods.c\n",
      "CC ../../py/sequence.c\n",
      "CC ../../py/stream.c\n",
      "CC ../../py/binary.c\n",
      "CC ../../py/builtinimport.c\n",
      "CC ../../py/builtinevex.c\n",
      "CC ../../py/builtinhelp.c\n",
      "CC ../../py/modarray.c\n",
      "CC ../../py/modbuiltins.c\n",
      "CC ../../py/modcollections.c\n",
      "CC ../../py/modgc.c\n",
      "CC ../../py/modio.c\n",
      "CC ../../py/modmath.c\n",
      "CC ../../py/modcmath.c\n",
      "CC ../../py/modmicropython.c\n",
      "CC ../../py/modstruct.c\n",
      "CC ../../py/modsys.c\n",
      "CC ../../py/moduerrno.c\n",
      "CC ../../py/modthread.c\n",
      "CC ../../py/vm.c\n",
      "CC ../../py/bc.c\n",
      "CC ../../py/showbc.c\n",
      "CC ../../py/repl.c\n",
      "CC ../../py/smallint.c\n",
      "CC ../../py/frozenmod.c\n",
      "CC ../../extmod/moductypes.c\n",
      "CC ../../extmod/modujson.c\n",
      "CC ../../extmod/modure.c\n",
      "CC ../../extmod/moduzlib.c\n",
      "CC ../../extmod/moduheapq.c\n",
      "CC ../../extmod/modutimeq.c\n",
      "CC ../../extmod/moduhashlib.c\n",
      "CC ../../extmod/moducryptolib.c\n",
      "CC ../../extmod/modubinascii.c\n",
      "CC ../../extmod/virtpin.c\n",
      "CC ../../extmod/machine_mem.c\n",
      "CC ../../extmod/machine_pinbase.c\n",
      "CC ../../extmod/machine_signal.c\n",
      "CC ../../extmod/machine_pulse.c\n",
      "CC ../../extmod/machine_i2c.c\n",
      "CC ../../extmod/machine_spi.c\n",
      "CC ../../extmod/modbluetooth.c\n",
      "CC ../../extmod/modussl_axtls.c\n",
      "CC ../../extmod/modussl_mbedtls.c\n",
      "CC ../../extmod/modurandom.c\n",
      "CC ../../extmod/moduselect.c\n",
      "CC ../../extmod/moduwebsocket.c\n",
      "CC ../../extmod/modwebrepl.c\n",
      "CC ../../extmod/modframebuf.c\n",
      "CC ../../extmod/vfs.c\n",
      "CC ../../extmod/vfs_blockdev.c\n",
      "CC ../../extmod/vfs_reader.c\n",
      "CC ../../extmod/vfs_posix.c\n",
      "CC ../../extmod/vfs_posix_file.c\n",
      "CC ../../extmod/vfs_fat.c\n",
      "CC ../../extmod/vfs_fat_diskio.c\n",
      "CC ../../extmod/vfs_fat_file.c\n",
      "CC ../../extmod/vfs_lfs.c\n",
      "CC ../../extmod/utime_mphal.c\n",
      "CC ../../extmod/uos_dupterm.c\n",
      "CC ../../lib/embed/abort_.c\n",
      "CC ../../lib/utils/printf.c\n",
      "MPY upip.py\n",
      "MPY upip_utarfile.py\n",
      "GEN build/frozen_content.c\n",
      "CC build/frozen_content.c\n",
      "CC main.c\n",
      "CC gccollect.c\n",
      "CC unix_mphal.c\n",
      "CC mpthreadport.c\n",
      "CC input.c\n",
      "CC file.c\n",
      "CC modmachine.c\n",
      "CC modos.c\n",
      "CC moduos_vfs.c\n",
      "CC modtime.c\n",
      "CC moduselect.c\n",
      "CC alloc.c\n",
      "CC coverage.c\n",
      "CC fatfs_port.c\n",
      "CC ../../../ulab/code/ndarray.c\n",
      "CC ../../../ulab/code/vectorise.c\n",
      "CC ../../../ulab/code/linalg.c\n",
      "CC ../../../ulab/code/poly.c\n",
      "CC ../../../ulab/code/numerical.c\n",
      "CC ../../../ulab/code/fft.c\n",
      "CC ../../../ulab/code/ulab.c\n",
      "CC ../../lib/axtls/ssl/asn1.c\n",
      "CC ../../lib/axtls/ssl/loader.c\n",
      "CC ../../lib/axtls/ssl/tls1.c\n",
      "CC ../../lib/axtls/ssl/tls1_svr.c\n",
      "CC ../../lib/axtls/ssl/tls1_clnt.c\n",
      "CC ../../lib/axtls/ssl/x509.c\n",
      "CC ../../lib/axtls/crypto/aes.c\n",
      "CC ../../lib/axtls/crypto/bigint.c\n",
      "CC ../../lib/axtls/crypto/crypto_misc.c\n",
      "CC ../../lib/axtls/crypto/hmac.c\n",
      "CC ../../lib/axtls/crypto/md5.c\n",
      "CC ../../lib/axtls/crypto/rsa.c\n",
      "CC ../../lib/axtls/crypto/sha1.c\n",
      "CC ../../extmod/modbtree.c\n",
      "CC ../../lib/berkeley-db-1.xx/btree/bt_close.c\n",
      "CC ../../lib/berkeley-db-1.xx/btree/bt_conv.c\n",
      "CC ../../lib/berkeley-db-1.xx/btree/bt_debug.c\n",
      "CC ../../lib/berkeley-db-1.xx/btree/bt_delete.c\n",
      "CC ../../lib/berkeley-db-1.xx/btree/bt_get.c\n",
      "CC ../../lib/berkeley-db-1.xx/btree/bt_open.c\n",
      "CC ../../lib/berkeley-db-1.xx/btree/bt_overflow.c\n",
      "CC ../../lib/berkeley-db-1.xx/btree/bt_page.c\n",
      "CC ../../lib/berkeley-db-1.xx/btree/bt_put.c\n",
      "CC ../../lib/berkeley-db-1.xx/btree/bt_search.c\n",
      "CC ../../lib/berkeley-db-1.xx/btree/bt_seq.c\n",
      "CC ../../lib/berkeley-db-1.xx/btree/bt_split.c\n",
      "CC ../../lib/berkeley-db-1.xx/btree/bt_utils.c\n",
      "CC ../../lib/berkeley-db-1.xx/mpool/mpool.c\n",
      "CC modtermios.c\n",
      "CC modusocket.c\n",
      "CC modffi.c\n",
      "CC ../../lib/mp-readline/readline.c\n",
      "CC ../../lib/timeutils/timeutils.c\n",
      "CC ../../lib/oofatfs/ff.c\n",
      "CC ../../lib/oofatfs/ffunicode.c\n",
      "LINK micropython\n",
      "   text\t   data\t    bss\t    dec\t    hex\tfilename\n",
      "   2127\t   6553\t      0\t   8680\t   21e8\tbuild/build/frozen_content.o\n",
      " 476087\t  57464\t   2120\t 535671\t  82c77\tmicropython\n"
     ]
    }
   ],
   "source": [
    "!make USER_C_MODULES=../../../ulab all"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "382.766px"
   },
   "toc_section_display": true,
   "toc_window_display": true
  },
  "toc-autonumbering": true,
  "toc-showcode": false,
  "toc-showmarkdowntxt": true,
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}