Mailing List Archive

[PATCH 2/2] tools/nolibc: implement strtol() and friends
The implementation always works on uintmax_t values.

This is inefficient when only 32bit are needed.
However for all functions this only happens for strtol() on 32bit
platforms.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
---
tools/include/nolibc/stdlib.h | 109 +++++++++++++++++++++++++++
tools/testing/selftests/nolibc/nolibc-test.c | 59 +++++++++++++++
2 files changed, 168 insertions(+)

diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h
index 5be9d3c7435a..f66870e25389 100644
--- a/tools/include/nolibc/stdlib.h
+++ b/tools/include/nolibc/stdlib.h
@@ -438,6 +438,115 @@ char *u64toa(uint64_t in)
return itoa_buffer;
}

+static __attribute__((unused))
+uintmax_t __strtox(const char *nptr, char **endptr, int base, intmax_t lower_limit, uintmax_t upper_limit)
+{
+ const char signed_ = lower_limit != 0;
+ unsigned char neg = 0, overflow = 0;
+ uintmax_t val = 0, limit, old_val;
+ char c;
+
+ if (base < 0 || base > 35) {
+ SET_ERRNO(EINVAL);
+ goto out;
+ }
+
+ while (isspace(*nptr))
+ nptr++;
+
+ if (*nptr == '+') {
+ nptr++;
+ } else if (*nptr == '-') {
+ neg = 1;
+ nptr++;
+ }
+
+ if (signed_ && neg)
+ limit = -(uintmax_t)lower_limit;
+ else
+ limit = upper_limit;
+
+ if ((base == 0 || base == 16) &&
+ (strncmp(nptr, "0x", 2) == 0 || strncmp(nptr, "0X", 2) == 0)) {
+ base = 16;
+ nptr += 2;
+ } else if (base == 0 && strncmp(nptr, "0", 1) == 0) {
+ base = 8;
+ nptr += 1;
+ } else if (base == 0) {
+ base = 10;
+ }
+
+ while (*nptr) {
+ c = *nptr;
+
+ if (c >= '0' && c <= '9')
+ c -= '0';
+ else if (c >= 'a' && c <= 'z')
+ c = c - 'a' + 10;
+ else if (c >= 'A' && c <= 'Z')
+ c = c - 'A' + 10;
+ else
+ goto out;
+
+ if (c > base)
+ goto out;
+
+ nptr++;
+ old_val = val;
+ val *= base;
+ val += c;
+
+ if (val > limit || val < old_val)
+ overflow = 1;
+ }
+
+out:
+ if (overflow) {
+ SET_ERRNO(ERANGE);
+ val = limit;
+ }
+ if (endptr)
+ *endptr = (char *)nptr;
+ return (neg ? -1 : 1) * val;
+}
+
+static __attribute__((unused))
+long strtol(const char *nptr, char **endptr, int base)
+{
+ return __strtox(nptr, endptr, base, LONG_MIN, LONG_MAX);
+}
+
+static __attribute__((unused))
+unsigned long strtoul(const char *nptr, char **endptr, int base)
+{
+ return __strtox(nptr, endptr, base, 0, ULONG_MAX);
+}
+
+static __attribute__((unused))
+long long strtoll(const char *nptr, char **endptr, int base)
+{
+ return __strtox(nptr, endptr, base, LLONG_MIN, LLONG_MAX);
+}
+
+static __attribute__((unused))
+unsigned long long strtoull(const char *nptr, char **endptr, int base)
+{
+ return __strtox(nptr, endptr, base, 0, ULLONG_MAX);
+}
+
+static __attribute__((unused))
+intmax_t strtoimax(const char *nptr, char **endptr, int base)
+{
+ return __strtox(nptr, endptr, base, INTMAX_MIN, INTMAX_MAX);
+}
+
+static __attribute__((unused))
+uintmax_t strtoumax(const char *nptr, char **endptr, int base)
+{
+ return __strtox(nptr, endptr, base, 0, UINTMAX_MAX);
+}
+
/* make sure to include all global symbols */
#include "nolibc.h"

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index b9c84d42edbe..6161bd57a0c9 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -621,6 +621,51 @@ int expect_str_buf_eq(size_t expr, const char *buf, size_t val, int llen, const
return 0;
}

+#define EXPECT_STRTOX(cond, func, input, base, expected, chars, expected_errno) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_strtox(llen, func, input, base, expected, chars, expected_errno); } while (0)
+
+static __attribute__((unused))
+int expect_strtox(int llen, void *func, const char *input, int base, intmax_t expected, int expected_chars, int expected_errno)
+{
+ char *endptr;
+ int actual_errno, actual_chars;
+ intmax_t r;
+
+ errno = 0;
+ if (func == strtol) {
+ r = strtol(input, &endptr, base);
+ } else if (func == strtoul) {
+ r = strtoul(input, &endptr, base);
+ } else {
+ result(llen, FAIL);
+ return 1;
+ }
+ actual_errno = errno;
+ actual_chars = endptr - input;
+
+ llen += printf(" %lld = %lld", (long long)expected, (long long)r);
+ if (r != expected) {
+ result(llen, FAIL);
+ return 1;
+ }
+ if (expected_chars == -1) {
+ if (*endptr != '\0') {
+ result(llen, FAIL);
+ return 1;
+ }
+ } else if (expected_chars != actual_chars) {
+ result(llen, FAIL);
+ return 1;
+ }
+ if (actual_errno != expected_errno) {
+ result(llen, FAIL);
+ return 1;
+ }
+
+ result(llen, OK);
+ return 0;
+}
+
/* declare tests based on line numbers. There must be exactly one test per line. */
#define CASE_TEST(name) \
case __LINE__: llen += printf("%d %s", test, #name);
@@ -1143,6 +1188,20 @@ int run_stdlib(int min, int max)
CASE_TEST(limit_ptrdiff_min); EXPECT_EQ(1, PTRDIFF_MIN, sizeof(long) == 8 ? (ptrdiff_t) 0x8000000000000000LL : (ptrdiff_t) 0x80000000); break;
CASE_TEST(limit_ptrdiff_max); EXPECT_EQ(1, PTRDIFF_MAX, sizeof(long) == 8 ? (ptrdiff_t) 0x7fffffffffffffffLL : (ptrdiff_t) 0x7fffffff); break;
CASE_TEST(limit_size_max); EXPECT_EQ(1, SIZE_MAX, sizeof(long) == 8 ? (size_t) 0xffffffffffffffffULL : (size_t) 0xffffffffU); break;
+ CASE_TEST(strtol_simple); EXPECT_STRTOX(1, strtol, "35", 10, 35, -1, 0); break;
+ CASE_TEST(strtol_positive); EXPECT_STRTOX(1, strtol, "+35", 10, 35, -1, 0); break;
+ CASE_TEST(strtol_negative); EXPECT_STRTOX(1, strtol, "-35", 10, -35, -1, 0); break;
+ CASE_TEST(strtol_hex_auto); EXPECT_STRTOX(1, strtol, "0xFF", 0, 255, -1, 0); break;
+ CASE_TEST(strtol_octal_auto); EXPECT_STRTOX(1, strtol, "011", 0, 9, -1, 0); break;
+ CASE_TEST(strtol_hex_00); EXPECT_STRTOX(1, strtol, "0x00", 16, 0, -1, 0); break;
+ CASE_TEST(strtol_hex_FF); EXPECT_STRTOX(1, strtol, "FF", 16, 255, -1, 0); break;
+ CASE_TEST(strtol_hex_ff); EXPECT_STRTOX(1, strtol, "ff", 16, 255, -1, 0); break;
+ CASE_TEST(strtol_hex_prefix); EXPECT_STRTOX(1, strtol, "0xFF", 16, 255, -1, 0); break;
+ CASE_TEST(strtol_trailer); EXPECT_STRTOX(1, strtol, "35foo", 10, 35, 2, 0); break;
+ CASE_TEST(strtol_overflow); EXPECT_STRTOX(1, strtol, "0x8000000000000000", 16, LONG_MAX, -1, ERANGE); break;
+ CASE_TEST(strtol_underflow); EXPECT_STRTOX(1, strtol, "-0x8000000000000001", 16, LONG_MIN, -1, ERANGE); break;
+ CASE_TEST(strtoul_negative); EXPECT_STRTOX(1, strtoul, "-0x1", 16, ULONG_MAX, 4, 0); break;
+ CASE_TEST(strtoul_overflow); EXPECT_STRTOX(1, strtoul, "0x10000000000000000", 16, ULONG_MAX, -1, ERANGE); break;

case __LINE__:
return ret; /* must be last */

--
2.44.0
Re: [PATCH 2/2] tools/nolibc: implement strtol() and friends [ In reply to ]
Hi Thomas,

On Thu, Apr 25, 2024 at 06:09:27PM +0200, Thomas Wei?schuh wrote:
> The implementation always works on uintmax_t values.
>
> This is inefficient when only 32bit are needed.
> However for all functions this only happens for strtol() on 32bit
> platforms.

That's indeed very useful! I think there's two small bugs below where
the second one hides the first one:

> +static __attribute__((unused))
> +uintmax_t __strtox(const char *nptr, char **endptr, int base, intmax_t lower_limit, uintmax_t upper_limit)
> +{
> + const char signed_ = lower_limit != 0;
> + unsigned char neg = 0, overflow = 0;
> + uintmax_t val = 0, limit, old_val;
> + char c;
> +
> + if (base < 0 || base > 35) {
^^^^^^^^^
should be 36 otherwise you won't support [0-9a-z].

> + SET_ERRNO(EINVAL);
> + goto out;
> + }
(...)
> + if (c > base)
> + goto out;

This should be "c >= base" otherwise 'z' is accepted in base 35 for
example. I think it could be useful to add one more test covering base
36 to make sure all chars pass ?

> + if (endptr)
> + *endptr = (char *)nptr;
> + return (neg ? -1 : 1) * val;

I just checked to see what the compiler does on this and quite frequently
it emits a multiply while the other approach involving only a negation is
always at least as short:

return neg ? -val : val;

E.g. here's the test code:

long fct1(long neg, long val)
{
return (neg ? -1 : 1) * val;
}

long fct2(long neg, long val)
{
return neg ? -val : val;
}

- on x86_64 with gcc-13.2 -Os:

0000000000000000 <fct1>:
0: f7 df neg %edi
2: 48 19 c0 sbb %rax,%rax
5: 48 83 c8 01 or $0x1,%rax
9: 48 0f af c6 imul %rsi,%rax
d: c3 ret

000000000000000e <fct2>:
e: 48 89 f0 mov %rsi,%rax
11: 85 ff test %edi,%edi
13: 74 03 je 18 <fct2+0xa>
15: 48 f7 d8 neg %rax
18: c3 ret

- on riscv64 with 13.2 -Os:

0000000000000000 <fct1>:
0: c509 beqz a0,a
2: 557d li a0,-1
4: 02b50533 mul a0,a0,a1
8: 8082 ret
a: 4505 li a0,1
c: bfe5 j 4

000000000000000e <fct2>:
e: c119 beqz a0,14
10: 40b005b3 neg a1,a1
14: 852e mv a0,a1
16: 8082 ret

So IMHO it would be better to go the simpler way even if these are just a
few bytes (and possibly ones less mul on some slow archs).

Thanks!
Willy