add dtype keyword to loadtxt

This commit is contained in:
Zoltán Vörös 2022-01-29 22:30:11 +01:00
parent 9dc9b776d5
commit 19282b47d1
8 changed files with 104 additions and 35 deletions

View file

@ -8,6 +8,7 @@
* Copyright (c) 2022 Zoltán Vörös
*/
#include <math.h>
#include <string.h>
#include "py/builtin.h"
@ -243,6 +244,7 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
{ MP_QSTR_comments, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = mp_const_none } },
{ MP_QSTR_max_rows, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = -1 } },
{ MP_QSTR_usecols, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = mp_const_none } },
{ MP_QSTR_dtype, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = NDARRAY_FLOAT } },
};
mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
@ -304,6 +306,8 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
}
}
uint8_t dtype = args[5].u_int;
// count the columns and rows
// we actually count only the rows and the items, and assume that
// the number of columns can be gotten by means of a simple division,
@ -363,7 +367,7 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
#if ULAB_MAX_DIMS == 1
shape[0] = rows;
ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(1, shape, NDARRAY_FLOAT);
ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(1, shape, dtype);
#else
if(args[4].u_obj == mp_const_none) {
shape[ULAB_MAX_DIMS - 1] = columns;
@ -371,11 +375,9 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
shape[ULAB_MAX_DIMS - 1] = used_columns;
}
shape[ULAB_MAX_DIMS - 2] = rows;
ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(2, shape, NDARRAY_FLOAT);
ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(2, shape, dtype);
#endif
mp_float_t *array = (mp_float_t *)ndarray->array;
struct mp_stream_seek_t seek_s;
seek_s.offset = 0;
seek_s.whence = MP_SEEK_SET;
@ -388,6 +390,7 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
rows = 0;
columns = 0;
size_t idx = 0;
do {
read = stream_p->read(stream, buffer, ULAB_IO_BUFFER_SIZE - 1, &error);
buffer[read] = '\0';
@ -421,12 +424,20 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
#else
if(args[4].u_obj == mp_const_none) {
mp_obj_t value = mp_parse_num_decimal(clipboard, len, false, false, NULL);
*array++ = mp_obj_get_float(value);
if(dtype != NDARRAY_FLOAT) {
mp_float_t _value = mp_obj_get_float(value);
value = mp_obj_new_int((int32_t)MICROPY_FLOAT_C_FUN(round)(_value));
}
ndarray_set_value(dtype, ndarray->array, idx++, value);
} else {
for(uint8_t c = 0; c < used_columns; c++) {
if(columns == cols[c]) {
mp_obj_t value = mp_parse_num_decimal(clipboard, len, false, false, NULL);
*array++ = mp_obj_get_float(value);
if(dtype != NDARRAY_FLOAT) {
mp_float_t _value = mp_obj_get_float(value);
value = mp_obj_new_int((int32_t)MICROPY_FLOAT_C_FUN(round)(_value));
}
ndarray_set_value(dtype, ndarray->array, idx++, value);
break;
}
}

View file

@ -27,7 +27,7 @@ copyright = '2019-2022, Zoltán Vörös and contributors'
author = 'Zoltán Vörös'
# The full version, including alpha/beta/rc tags
release = '4.4.0'
release = '4.4.1'
# -- General configuration ---------------------------------------------------

View file

@ -1027,10 +1027,12 @@ https://docs.scipy.org/doc/numpy/reference/generated/numpy.loadtxt.html
The function reads data from a text file, and returns the generated
array. It takes a file name as the single positional argument, and the
``comments`` (with a default value of ``#``), the ``delimiter`` (with a
default value of ``,``), ``usecols`` (with a default of all columns),
and ``max_rows`` (with a default of all rows) keyword arguments. The
array returned is always of type ``float``.
``dtype`` (with a default value of ``float``), the ``comments`` (with a
default value of ``#``), the ``delimiter`` (with a default value of
``,``), ``usecols`` (with a default of all columns), and the
``max_rows`` (with a default of all rows) keyword arguments. If
``dtype`` is supplied and is not ``float``, the data entries will be
converted to the appropriate integer type by rounding the values.
.. code::
@ -1040,8 +1042,12 @@ array returned is always of type ``float``.
print('read all data')
print(np.loadtxt('loadtxt.dat'))
print('\nread maximum 5 rows (first row is a comment line)')
print(np.loadtxt('loadtxt.dat', max_rows=5))
print('\nread maximum 5 rows, convert dtype')
print(np.loadtxt('loadtxt.dat', max_rows=5, dtype=np.uint8))
.. parsed-literal::
@ -1052,7 +1058,9 @@ array returned is always of type ``float``.
[12.0, 13.0, 14.0, 15.0],
[16.0, 17.0, 18.0, 19.0],
[20.0, 21.0, 22.0, 23.0],
[24.0, 25.0, 26.0, 27.0]], dtype=float64)
[24.0, 25.0, 26.0, 27.0],
[28.00000000000001, 29.0, 30.0, 31.0],
[32.0, 33.0, 34.00000000000001, 35.0]], dtype=float64)
read maximum 5 rows (first row is a comment line)
array([[0.0, 1.0, 2.0, 3.0],
@ -1060,6 +1068,12 @@ array returned is always of type ``float``.
[8.0, 9.0, 10.0, 11.0],
[12.0, 13.0, 14.0, 15.0]], dtype=float64)
read maximum 5 rows, convert dtype
array([[0, 1, 2, 3],
[4, 5, 6, 7],
[8, 9, 10, 11],
[12, 13, 14, 15]], dtype=uint8)

View file

@ -31,11 +31,11 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2022-01-28T18:34:31.017702Z",
"start_time": "2022-01-28T18:34:31.010354Z"
"end_time": "2022-01-29T21:24:54.931042Z",
"start_time": "2022-01-29T21:24:54.927243Z"
}
},
"outputs": [],
@ -49,11 +49,11 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2022-01-28T18:34:31.565147Z",
"start_time": "2022-01-28T18:34:31.550395Z"
"end_time": "2022-01-29T21:24:55.649634Z",
"start_time": "2022-01-29T21:24:55.626921Z"
}
},
"outputs": [],
@ -1474,16 +1474,16 @@
"\n",
"`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.loadtxt.html\n",
"\n",
"The function reads data from a text file, and returns the generated array. It takes a file name as the single positional argument, and the `comments` (with a default value of `#`), the `delimiter` (with a default value of `,`), `usecols` (with a default of all columns), and `max_rows` (with a default of all rows) keyword arguments. The array returned is always of type `float`."
"The function reads data from a text file, and returns the generated array. It takes a file name as the single positional argument, and the `dtype` (with a default value of `float`), the `comments` (with a default value of `#`), the `delimiter` (with a default value of `,`), `usecols` (with a default of all columns), and the `max_rows` (with a default of all rows) keyword arguments. If `dtype` is supplied and is not `float`, the data entries will be converted to the appropriate integer type by rounding the values."
]
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2022-01-28T18:47:52.346814Z",
"start_time": "2022-01-28T18:47:52.291552Z"
"end_time": "2022-01-29T21:26:36.258135Z",
"start_time": "2022-01-29T21:26:36.236256Z"
}
},
"outputs": [
@ -1498,7 +1498,9 @@
" [12.0, 13.0, 14.0, 15.0],\n",
" [16.0, 17.0, 18.0, 19.0],\n",
" [20.0, 21.0, 22.0, 23.0],\n",
" [24.0, 25.0, 26.0, 27.0]], dtype=float64)\n",
" [24.0, 25.0, 26.0, 27.0],\n",
" [28.00000000000001, 29.0, 30.0, 31.0],\n",
" [32.0, 33.0, 34.00000000000001, 35.0]], dtype=float64)\n",
"\n",
"read maximum 5 rows (first row is a comment line)\n",
"array([[0.0, 1.0, 2.0, 3.0],\n",
@ -1506,6 +1508,12 @@
" [8.0, 9.0, 10.0, 11.0],\n",
" [12.0, 13.0, 14.0, 15.0]], dtype=float64)\n",
"\n",
"read maximum 5 rows, convert dtype\n",
"array([[0, 1, 2, 3],\n",
" [4, 5, 6, 7],\n",
" [8, 9, 10, 11],\n",
" [12, 13, 14, 15]], dtype=uint8)\n",
"\n",
"\n"
]
}
@ -1517,8 +1525,12 @@
"\n",
"print('read all data')\n",
"print(np.loadtxt('loadtxt.dat'))\n",
"\n",
"print('\\nread maximum 5 rows (first row is a comment line)')\n",
"print(np.loadtxt('loadtxt.dat', max_rows=5))"
"print(np.loadtxt('loadtxt.dat', max_rows=5))\n",
"\n",
"print('\\nread maximum 5 rows, convert dtype')\n",
"print(np.loadtxt('loadtxt.dat', max_rows=5, dtype=np.uint8))"
]
},
{

View file

@ -1,3 +1,9 @@
Sat, 29 Jan 2022
version 4.4.1
add dtype keyword to loadtxt
Tue, 15 Jan 2022
version 4.3.2

View file

@ -17,8 +17,8 @@
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2022-01-28T18:33:51.163571Z",
"start_time": "2022-01-28T18:33:51.156339Z"
"end_time": "2022-01-29T21:27:54.988801Z",
"start_time": "2022-01-29T21:27:54.980856Z"
}
},
"outputs": [
@ -61,7 +61,7 @@
"author = 'Zoltán Vörös'\n",
"\n",
"# The full version, including alpha/beta/rc tags\n",
"release = '4.4.0'\n",
"release = '4.4.1'\n",
"\n",
"\n",
"# -- General configuration ---------------------------------------------------\n",
@ -215,11 +215,11 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2022-01-28T18:56:20.180430Z",
"start_time": "2022-01-28T18:56:19.953451Z"
"end_time": "2022-01-29T21:27:59.573556Z",
"start_time": "2022-01-29T21:27:57.323819Z"
}
},
"outputs": [],
@ -256,11 +256,11 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2022-01-28T18:56:26.741592Z",
"start_time": "2022-01-28T18:56:21.395976Z"
"end_time": "2022-01-29T21:28:16.742315Z",
"start_time": "2022-01-29T21:28:11.284954Z"
}
},
"outputs": [],

View file

@ -3,22 +3,31 @@ try:
except:
import numpy as np
dtypes = (np.uint8, np.int8, np.uint16, np.int16)
a = np.array(range(8)).reshape((2, 4))
np.savetxt('loadtxt.dat', a, header='test file data')
print(np.loadtxt('loadtxt.dat'))
print()
for dtype in dtypes:
print(np.loadtxt('loadtxt.dat', dtype=dtype))
print()
np.savetxt('loadtxt.dat', a, delimiter=',', header='test file data')
print(np.loadtxt('loadtxt.dat', delimiter=','))
print()
np.savetxt('loadtxt.dat', a, delimiter=',', comments='!', header='test file data')
print(np.loadtxt('loadtxt.dat', delimiter=',', comments='!'))
print()
print(np.loadtxt('loadtxt.dat', delimiter=',', comments='!', usecols=1))
print()
print(np.loadtxt('loadtxt.dat', delimiter=',', comments='!', usecols=(0, 1)))
print()
a = np.array(range(36)).reshape((9, 4))
np.savetxt('loadtxt.dat', a, header='9 data rows and a comment')

View file

@ -1,13 +1,30 @@
array([[0.0, 1.0, 2.0, 3.0],
[4.0, 5.0, 6.0, 7.0]], dtype=float64)
array([[0, 1, 2, 3],
[4, 5, 6, 7]], dtype=uint8)
array([[0, 1, 2, 3],
[4, 5, 6, 7]], dtype=int8)
array([[0, 1, 2, 3],
[4, 5, 6, 7]], dtype=uint16)
array([[0, 1, 2, 3],
[4, 5, 6, 7]], dtype=int16)
array([[0.0, 1.0, 2.0, 3.0],
[4.0, 5.0, 6.0, 7.0]], dtype=float64)
array([[0.0, 1.0, 2.0, 3.0],
[4.0, 5.0, 6.0, 7.0]], dtype=float64)
array([[1.0],
[5.0]], dtype=float64)
array([[0.0, 1.0],
[4.0, 5.0]], dtype=float64)
array([[0.0, 1.0, 2.0, 3.0],
[4.0, 5.0, 6.0, 7.0],
[8.0, 9.0, 10.0, 11.0],