aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAvinal Kumar <avinal.xlvii@gmail.com>2024-10-25 15:48:27 +0530
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>2024-10-25 15:05:06 -0300
commit04e8698fcca7d1e932bc54f5b60e1bbce2e87601 (patch)
tree43b2939f0ef8a5abf808806137cd1e30b7217a28
parentac73067cb7a328bf106ecd041c020fc61be7e087 (diff)
downloadglibc-04e8698fcca7d1e932bc54f5b60e1bbce2e87601.zip
glibc-04e8698fcca7d1e932bc54f5b60e1bbce2e87601.tar.gz
glibc-04e8698fcca7d1e932bc54f5b60e1bbce2e87601.tar.bz2
stdio-common: Fix scanf parsing for NaN types [BZ #30647]
The scanf family of functions like sscanf and fscanf currently ignore nan() and nan(n-char-sequence). This happens because __vfscanf_internal only checks for 'nan'. This commit adds support for all valid nan types i.e. nan, nan() and nan(n-char-sequence), where n-char-sequence can be [a-zA-Z0-9_]+, thus fixing the bug 30647. Any other representation of NaN should result in conversion error. New tests are also added to verify the correct parsing of NaN types for float, double and long double formats. Signed-off-by: Avinal Kumar <avinal.xlvii@gmail.com> Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
-rw-r--r--stdio-common/Makefile1
-rw-r--r--stdio-common/tst-scanf-nan.c83
-rw-r--r--stdio-common/vfscanf-internal.c46
3 files changed, 129 insertions, 1 deletions
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index 88105b3..a166eb7 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -261,6 +261,7 @@ tests := \
tst-scanf-binary-gnu89 \
tst-scanf-bz27650 \
tst-scanf-intn \
+ tst-scanf-nan \
tst-scanf-round \
tst-scanf-to_inpunct \
tst-setvbuf1 \
diff --git a/stdio-common/tst-scanf-nan.c b/stdio-common/tst-scanf-nan.c
new file mode 100644
index 0000000..7450b37
--- /dev/null
+++ b/stdio-common/tst-scanf-nan.c
@@ -0,0 +1,83 @@
+/* Test scanf formats for nan, nan(), nan(n-char-sequence) types.
+ Copyright The GNU Toolchain Authors.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <support/check.h>
+
+#define CHECK_SCANF_RET(OK, STR, FMT, ...) \
+ do \
+ { \
+ int ret = sscanf (STR, FMT, __VA_ARGS__); \
+ TEST_VERIFY (ret == (OK)); \
+ } \
+ while (0)
+
+/* Valid nan types:
+ 1. nan
+ 2. nan()
+ 3. nan([a-zA-Z0-9_]+)
+ Any other nan format is invalid and should produce a conversion error.
+ The return value denotes the number of valid conversions. On conversion
+ error the rest of the input is discarded. */
+static int
+do_test (void)
+{
+ int a;
+ float b;
+ double c;
+ long double d;
+
+ /* All valid inputs. */
+ CHECK_SCANF_RET (1, "nan", "%lf", &c);
+ CHECK_SCANF_RET (1, "nan()", "%lf", &c);
+ CHECK_SCANF_RET (1, "nan(12345)", "%lf", &c);
+ CHECK_SCANF_RET (2, "nan12", "%lf%d", &c, &a);
+ CHECK_SCANF_RET (2, "nan nan()", "%f%Lf", &b, &d);
+ CHECK_SCANF_RET (2, "nan nan(12345foo)", "%lf%Lf", &c, &d);
+ CHECK_SCANF_RET (3, "nan nan() 12.234", "%lf%Lf%f", &c, &d, &b);
+ CHECK_SCANF_RET (4, "nannan()nan(foo)1234", "%lf%f%Lf%d", &c, &b, &d, &a);
+
+ /* Partially valid inputs. */
+ CHECK_SCANF_RET (1, "nan( )", "%3lf", &c);
+ CHECK_SCANF_RET (1, "nan nan(", "%lf%f", &c, &b);
+
+ /* Invalid inputs. */
+
+ /* Dangling parentheses. */
+ CHECK_SCANF_RET (0, "nan(", "%lf", &c);
+ CHECK_SCANF_RET (0, "nan(123", "%lf", &c);
+ CHECK_SCANF_RET (0, "nan(12345", "%lf%d", &c, &a);
+
+ /* Field width is not sufficient for valid conversion. */
+ CHECK_SCANF_RET (0, "nan()", "%4Lf", &d);
+ CHECK_SCANF_RET (0, "nan(1", "%5lf", &c);
+
+ /* Space is not a valid character. */
+ CHECK_SCANF_RET (0, "nan( )", "%lf", &c);
+ CHECK_SCANF_RET (0, "nan( )12.34", "%Lf%f", &d, &b);
+ CHECK_SCANF_RET (0, "nan(12 foo)", "%f", &b);
+
+ /* Period '.' is not a valid character. */
+ CHECK_SCANF_RET (0, "nan(12.34) nan(FooBar)", "%lf%Lf", &c, &d);
+
+ return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/stdio-common/vfscanf-internal.c b/stdio-common/vfscanf-internal.c
index 1b82def..5f38f99 100644
--- a/stdio-common/vfscanf-internal.c
+++ b/stdio-common/vfscanf-internal.c
@@ -2028,7 +2028,51 @@ digits_extended_fail:
if (width > 0)
--width;
char_buffer_add (&charbuf, c);
- /* It is "nan". */
+ /* It is at least "nan". Now we check for nan() and
+ nan(n-char-sequence). */
+ if (width != 0 && inchar () != EOF)
+ {
+ if (c == L_('('))
+ {
+ if (width > 0)
+ --width;
+ char_buffer_add (&charbuf, c);
+ /* A '(' was observed, check for a closing ')', there
+ may or may not be a n-char-sequence in between. We
+ have to check the longest prefix until there is a
+ conversion error or closing parenthesis. */
+ do
+ {
+ if (__glibc_unlikely (width == 0
+ || inchar () == EOF))
+ {
+ /* Conversion error because we ran out of
+ characters. */
+ conv_error ();
+ break;
+ }
+ if (!((c >= L_('0') && c <= L_('9'))
+ || (c >= L_('A') && c <= L_('Z'))
+ || (c >= L_('a') && c <= L_('z'))
+ || c == L_('_') || c == L_(')')))
+ {
+ /* Invalid character was observed. Only valid
+ characters are [a-zA-Z0-9_] and ')'. */
+ conv_error ();
+ break;
+ }
+ if (width > 0)
+ --width;
+ char_buffer_add (&charbuf, c);
+ }
+ while (c != L_(')'));
+ /* The loop only exits successfully when ')' is the
+ last character. */
+ }
+ else
+ /* It is only 'nan'. */
+ ungetc (c, s);
+ }
goto scan_float;
}
else if (TOLOWER (c) == L_('i'))