add dtype keyword to loadtxt

2022-01-29 22:30:11 +01:00 · 2022-01-29 22:30:11 +01:00 · 19282b47d1
commit 19282b47d1
parent 9dc9b776d5
8 changed files with 104 additions and 35 deletions
--- a/code/numpy/io/io.c
+++ b/code/numpy/io/io.c
@ -8,6 +8,7 @@
 * Copyright (c) 2022 Zoltán Vörös
 */

+#include <math.h>
 #include <string.h>

 #include "py/builtin.h"
@ -243,6 +244,7 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
        { MP_QSTR_comments, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = mp_const_none } },
        { MP_QSTR_max_rows, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = -1 } },
        { MP_QSTR_usecols, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = mp_const_none } },
+        { MP_QSTR_dtype, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = NDARRAY_FLOAT } },
    };

    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
@ -304,6 +306,8 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
        }
    }

+    uint8_t dtype = args[5].u_int;
+
    // count the columns and rows
    // we actually count only the rows and the items, and assume that
    // the number of columns can be gotten by means of a simple division,
@ -363,7 +367,7 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw

    #if ULAB_MAX_DIMS == 1
    shape[0] = rows;
-    ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(1, shape, NDARRAY_FLOAT);
+    ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(1, shape, dtype);
    #else
    if(args[4].u_obj == mp_const_none) {
        shape[ULAB_MAX_DIMS - 1] = columns;
@ -371,11 +375,9 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
        shape[ULAB_MAX_DIMS - 1] = used_columns;
    }
    shape[ULAB_MAX_DIMS - 2] = rows;
-    ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(2, shape, NDARRAY_FLOAT);
+    ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(2, shape, dtype);
    #endif

-    mp_float_t *array = (mp_float_t *)ndarray->array;
-
    struct mp_stream_seek_t seek_s;
    seek_s.offset = 0;
    seek_s.whence = MP_SEEK_SET;
@ -388,6 +390,7 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
    rows = 0;
    columns = 0;

+    size_t idx = 0;
    do {
        read = stream_p->read(stream, buffer, ULAB_IO_BUFFER_SIZE - 1, &error);
        buffer[read] = '\0';
@ -421,12 +424,20 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
                #else
                if(args[4].u_obj == mp_const_none) {
                    mp_obj_t value = mp_parse_num_decimal(clipboard, len, false, false, NULL);
-                    *array++ = mp_obj_get_float(value);
+                    if(dtype != NDARRAY_FLOAT) {
+                        mp_float_t _value = mp_obj_get_float(value);
+                        value = mp_obj_new_int((int32_t)MICROPY_FLOAT_C_FUN(round)(_value));
+                    }
+                    ndarray_set_value(dtype, ndarray->array, idx++, value);
                } else {
                    for(uint8_t c = 0; c < used_columns; c++) {
                        if(columns == cols[c]) {
                            mp_obj_t value = mp_parse_num_decimal(clipboard, len, false, false, NULL);
-                            *array++ = mp_obj_get_float(value);
+                            if(dtype != NDARRAY_FLOAT) {
+                                mp_float_t _value = mp_obj_get_float(value);
+                                value = mp_obj_new_int((int32_t)MICROPY_FLOAT_C_FUN(round)(_value));
+                            }
+                            ndarray_set_value(dtype, ndarray->array, idx++, value);
                            break;
                        }
                    }
--- a/docs/manual/source/conf.py
+++ b/docs/manual/source/conf.py
@ -27,7 +27,7 @@ copyright = '2019-2022, Zoltán Vörös and contributors'
 author = 'Zoltán Vörös'

 # The full version, including alpha/beta/rc tags
-release = '4.4.0'
+release = '4.4.1'


 # -- General configuration ---------------------------------------------------
--- a/docs/manual/source/numpy-functions.rst
+++ b/docs/manual/source/numpy-functions.rst
@ -1027,10 +1027,12 @@ https://docs.scipy.org/doc/numpy/reference/generated/numpy.loadtxt.html

 The function reads data from a text file, and returns the generated
 array. It takes a file name as the single positional argument, and the
-``comments`` (with a default value of ``#``), the ``delimiter`` (with a
-default value of ``,``), ``usecols`` (with a default of all columns),
-and ``max_rows`` (with a default of all rows) keyword arguments. The
-array returned is always of type ``float``.
+``dtype`` (with a default value of ``float``), the ``comments`` (with a
+default value of ``#``), the ``delimiter`` (with a default value of
+``,``), ``usecols`` (with a default of all columns), and the
+``max_rows`` (with a default of all rows) keyword arguments. If
+``dtype`` is supplied and is not ``float``, the data entries will be
+converted to the appropriate integer type by rounding the values.

 .. code::
        
@ -1040,8 +1042,12 @@ array returned is always of type ``float``.
    
    print('read all data')
    print(np.loadtxt('loadtxt.dat'))
+    
    print('\nread maximum 5 rows (first row is a comment line)')
    print(np.loadtxt('loadtxt.dat', max_rows=5))
+    
+    print('\nread maximum 5 rows, convert dtype')
+    print(np.loadtxt('loadtxt.dat', max_rows=5, dtype=np.uint8))

 .. parsed-literal::

@ -1052,7 +1058,9 @@ array returned is always of type ``float``.
           [12.0, 13.0, 14.0, 15.0],
           [16.0, 17.0, 18.0, 19.0],
           [20.0, 21.0, 22.0, 23.0],
-           [24.0, 25.0, 26.0, 27.0]], dtype=float64)
+           [24.0, 25.0, 26.0, 27.0],
+           [28.00000000000001, 29.0, 30.0, 31.0],
+           [32.0, 33.0, 34.00000000000001, 35.0]], dtype=float64)
    
    read maximum 5 rows (first row is a comment line)
    array([[0.0, 1.0, 2.0, 3.0],
@ -1060,6 +1068,12 @@ array returned is always of type ``float``.
           [8.0, 9.0, 10.0, 11.0],
           [12.0, 13.0, 14.0, 15.0]], dtype=float64)
    
+    read maximum 5 rows, convert dtype
+    array([[0, 1, 2, 3],
+           [4, 5, 6, 7],
+           [8, 9, 10, 11],
+           [12, 13, 14, 15]], dtype=uint8)
+    
    


--- a/docs/numpy-functions.ipynb
+++ b/docs/numpy-functions.ipynb
@ -31,11 +31,11 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
-     "end_time": "2022-01-28T18:34:31.017702Z",
-     "start_time": "2022-01-28T18:34:31.010354Z"
+     "end_time": "2022-01-29T21:24:54.931042Z",
+     "start_time": "2022-01-29T21:24:54.927243Z"
    }
   },
   "outputs": [],
@ -49,11 +49,11 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
-     "end_time": "2022-01-28T18:34:31.565147Z",
-     "start_time": "2022-01-28T18:34:31.550395Z"
+     "end_time": "2022-01-29T21:24:55.649634Z",
+     "start_time": "2022-01-29T21:24:55.626921Z"
    }
   },
   "outputs": [],
@ -1474,16 +1474,16 @@
    "\n",
    "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.loadtxt.html\n",
    "\n",
-    "The function reads data from a text file, and returns the generated array. It takes a file name as the single positional argument, and the `comments` (with a default value of `#`), the `delimiter` (with a default value of `,`), `usecols` (with a default of all columns), and `max_rows` (with a default of all rows) keyword arguments. The array returned is always of type `float`."
+    "The function reads data from a text file, and returns the generated array. It takes a file name as the single positional argument, and the `dtype` (with a default value of `float`), the `comments` (with a default value of `#`), the `delimiter` (with a default value of `,`), `usecols` (with a default of all columns), and the `max_rows` (with a default of all rows) keyword arguments. If `dtype` is supplied and is not `float`, the data entries will be converted to the appropriate integer type by rounding the values."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
-     "end_time": "2022-01-28T18:47:52.346814Z",
-     "start_time": "2022-01-28T18:47:52.291552Z"
+     "end_time": "2022-01-29T21:26:36.258135Z",
+     "start_time": "2022-01-29T21:26:36.236256Z"
    }
   },
   "outputs": [
@ -1498,7 +1498,9 @@
      "       [12.0, 13.0, 14.0, 15.0],\n",
      "       [16.0, 17.0, 18.0, 19.0],\n",
      "       [20.0, 21.0, 22.0, 23.0],\n",
-      "       [24.0, 25.0, 26.0, 27.0]], dtype=float64)\n",
+      "       [24.0, 25.0, 26.0, 27.0],\n",
+      "       [28.00000000000001, 29.0, 30.0, 31.0],\n",
+      "       [32.0, 33.0, 34.00000000000001, 35.0]], dtype=float64)\n",
      "\n",
      "read maximum 5 rows (first row is a comment line)\n",
      "array([[0.0, 1.0, 2.0, 3.0],\n",
@ -1506,6 +1508,12 @@
      "       [8.0, 9.0, 10.0, 11.0],\n",
      "       [12.0, 13.0, 14.0, 15.0]], dtype=float64)\n",
      "\n",
+      "read maximum 5 rows, convert dtype\n",
+      "array([[0, 1, 2, 3],\n",
+      "       [4, 5, 6, 7],\n",
+      "       [8, 9, 10, 11],\n",
+      "       [12, 13, 14, 15]], dtype=uint8)\n",
+      "\n",
      "\n"
     ]
    }
@ -1517,8 +1525,12 @@
    "\n",
    "print('read all data')\n",
    "print(np.loadtxt('loadtxt.dat'))\n",
+    "\n",
    "print('\\nread maximum 5 rows (first row is a comment line)')\n",
-    "print(np.loadtxt('loadtxt.dat', max_rows=5))"
+    "print(np.loadtxt('loadtxt.dat', max_rows=5))\n",
+    "\n",
+    "print('\\nread maximum 5 rows, convert dtype')\n",
+    "print(np.loadtxt('loadtxt.dat', max_rows=5, dtype=np.uint8))"
   ]
  },
  {
--- a/docs/ulab-change-log.md
+++ b/docs/ulab-change-log.md
@ -1,3 +1,9 @@
+Sat, 29 Jan 2022
+
+version 4.4.1
+
+    add dtype keyword to loadtxt
+
 Tue, 15 Jan 2022

 version 4.3.2
--- a/docs/ulab-convert.ipynb
+++ b/docs/ulab-convert.ipynb
@ -17,8 +17,8 @@
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
-     "end_time": "2022-01-28T18:33:51.163571Z",
-     "start_time": "2022-01-28T18:33:51.156339Z"
+     "end_time": "2022-01-29T21:27:54.988801Z",
+     "start_time": "2022-01-29T21:27:54.980856Z"
    }
   },
   "outputs": [
@ -61,7 +61,7 @@
    "author = 'Zoltán Vörös'\n",
    "\n",
    "# The full version, including alpha/beta/rc tags\n",
-    "release = '4.4.0'\n",
+    "release = '4.4.1'\n",
    "\n",
    "\n",
    "# -- General configuration ---------------------------------------------------\n",
@ -215,11 +215,11 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
-     "end_time": "2022-01-28T18:56:20.180430Z",
-     "start_time": "2022-01-28T18:56:19.953451Z"
+     "end_time": "2022-01-29T21:27:59.573556Z",
+     "start_time": "2022-01-29T21:27:57.323819Z"
    }
   },
   "outputs": [],
@ -256,11 +256,11 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
-     "end_time": "2022-01-28T18:56:26.741592Z",
-     "start_time": "2022-01-28T18:56:21.395976Z"
+     "end_time": "2022-01-29T21:28:16.742315Z",
+     "start_time": "2022-01-29T21:28:11.284954Z"
    }
   },
   "outputs": [],
--- a/tests/2d/numpy/loadtxt.py
+++ b/tests/2d/numpy/loadtxt.py
@ -3,22 +3,31 @@ try:
 except:
    import numpy as np

+dtypes = (np.uint8, np.int8, np.uint16, np.int16)
+
 a = np.array(range(8)).reshape((2, 4))
 np.savetxt('loadtxt.dat', a, header='test file data')

 print(np.loadtxt('loadtxt.dat'))
+print()
+
+for dtype in dtypes:
+    print(np.loadtxt('loadtxt.dat', dtype=dtype))
+    print()

 np.savetxt('loadtxt.dat', a, delimiter=',', header='test file data')

 print(np.loadtxt('loadtxt.dat', delimiter=','))
-
+print()

 np.savetxt('loadtxt.dat', a, delimiter=',', comments='!', header='test file data')

 print(np.loadtxt('loadtxt.dat', delimiter=',', comments='!'))
+print()
 print(np.loadtxt('loadtxt.dat', delimiter=',', comments='!', usecols=1))
+print()
 print(np.loadtxt('loadtxt.dat', delimiter=',', comments='!', usecols=(0, 1)))
-
+print()

 a = np.array(range(36)).reshape((9, 4))
 np.savetxt('loadtxt.dat', a, header='9 data rows and a comment')
--- a/tests/2d/numpy/loadtxt.py.exp
+++ b/tests/2d/numpy/loadtxt.py.exp
@ -1,13 +1,30 @@
 array([[0.0, 1.0, 2.0, 3.0],
       [4.0, 5.0, 6.0, 7.0]], dtype=float64)
+
+array([[0, 1, 2, 3],
+       [4, 5, 6, 7]], dtype=uint8)
+
+array([[0, 1, 2, 3],
+       [4, 5, 6, 7]], dtype=int8)
+
+array([[0, 1, 2, 3],
+       [4, 5, 6, 7]], dtype=uint16)
+
+array([[0, 1, 2, 3],
+       [4, 5, 6, 7]], dtype=int16)
+
 array([[0.0, 1.0, 2.0, 3.0],
       [4.0, 5.0, 6.0, 7.0]], dtype=float64)
+
 array([[0.0, 1.0, 2.0, 3.0],
       [4.0, 5.0, 6.0, 7.0]], dtype=float64)
+
 array([[1.0],
       [5.0]], dtype=float64)
+
 array([[0.0, 1.0],
       [4.0, 5.0]], dtype=float64)
+
 array([[0.0, 1.0, 2.0, 3.0],
       [4.0, 5.0, 6.0, 7.0],
       [8.0, 9.0, 10.0, 11.0],