py/objstr: Skip whitespace in bytes.fromhex().
Skip whitespace characters between pairs of hex numbers. This makes `bytes.fromhex()` compatible with cpython. Includes simple test in `tests/basic/builtin_str_hex.py`. Signed-off-by: Glenn Moloney <glenn.moloney@gmail.com>
This commit is contained in:
parent
e9814e987b
commit
6367099f83
3 changed files with 36 additions and 16 deletions
24
py/objstr.c
24
py/objstr.c
|
|
@ -2014,27 +2014,21 @@ mp_obj_t mp_obj_bytes_fromhex(mp_obj_t type_in, mp_obj_t data) {
|
|||
mp_buffer_info_t bufinfo;
|
||||
mp_get_buffer_raise(data, &bufinfo, MP_BUFFER_READ);
|
||||
|
||||
if ((bufinfo.len & 1) != 0) {
|
||||
mp_raise_ValueError(MP_ERROR_TEXT("odd-length string"));
|
||||
}
|
||||
vstr_t vstr;
|
||||
vstr_init_len(&vstr, bufinfo.len / 2);
|
||||
byte *in = bufinfo.buf, *out = (byte *)vstr.buf;
|
||||
byte hex_byte = 0;
|
||||
for (mp_uint_t i = bufinfo.len; i--;) {
|
||||
byte hex_ch = *in++;
|
||||
if (unichar_isxdigit(hex_ch)) {
|
||||
hex_byte += unichar_xdigit_value(hex_ch);
|
||||
} else {
|
||||
mp_raise_ValueError(MP_ERROR_TEXT("non-hex digit found"));
|
||||
byte *in_end = in + bufinfo.len;
|
||||
mp_uint_t ch1, ch2;
|
||||
while (in < in_end) {
|
||||
if (unichar_isspace(ch1 = *in++)) {
|
||||
continue; // Skip whitespace between hex digit pairs
|
||||
}
|
||||
if (i & 1) {
|
||||
hex_byte <<= 4;
|
||||
} else {
|
||||
*out++ = hex_byte;
|
||||
hex_byte = 0;
|
||||
if (in == in_end || !unichar_isxdigit(ch1) || !unichar_isxdigit(ch2 = *in++)) {
|
||||
mp_raise_ValueError(MP_ERROR_TEXT("non-hex digit"));
|
||||
}
|
||||
*out++ = (byte)((unichar_xdigit_value(ch1) << 4) | unichar_xdigit_value(ch2));
|
||||
}
|
||||
vstr.len = out - (byte *)vstr.buf; // Length may be shorter due to whitespace in input
|
||||
return mp_obj_new_str_type_from_vstr(MP_OBJ_TO_PTR(type_in), &vstr);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -20,5 +20,20 @@ for x in (
|
|||
"08090a0b0c0d0e0f",
|
||||
"7f80ff",
|
||||
"313233344142434461626364",
|
||||
"ab\tcd\n ef ",
|
||||
"ab cd ef",
|
||||
"ab cd ef ",
|
||||
" ab cd ef ",
|
||||
# Invalid hex strings:
|
||||
"abcde", # Odd number of hex digits
|
||||
"ab cd e",
|
||||
"a b cd ef", # Spaces between hex pairs
|
||||
"ab cd e f ",
|
||||
"abga", # Invalid hex digits
|
||||
"ab_cd",
|
||||
"ab:cd",
|
||||
):
|
||||
print(bytes.fromhex(x))
|
||||
try:
|
||||
print(bytes.fromhex(x))
|
||||
except ValueError as e:
|
||||
print("ValueError:", e)
|
||||
|
|
|
|||
|
|
@ -26,3 +26,14 @@ b'\x00\x01\x02\x03\x04\x05\x06\x07'
|
|||
b'\x08\t\n\x0b\x0c\r\x0e\x0f'
|
||||
b'\x7f\x80\xff'
|
||||
b'1234ABCDabcd'
|
||||
b'\xab\xcd\xef'
|
||||
b'\xab\xcd\xef'
|
||||
b'\xab\xcd\xef'
|
||||
b'\xab\xcd\xef'
|
||||
ValueError: non-hex digit
|
||||
ValueError: non-hex digit
|
||||
ValueError: non-hex digit
|
||||
ValueError: non-hex digit
|
||||
ValueError: non-hex digit
|
||||
ValueError: non-hex digit
|
||||
ValueError: non-hex digit
|
||||
|
|
|
|||
Loading…
Reference in a new issue