Add 'string is' to Jim

Also, double parsing now allows trailing white space Signed-off-by: Steve Bennett <steveb@workware.net.au>
author: Steve Bennett <steveb@workware.net.au> 2010-08-31 07:18:26 +1000
committer: Steve Bennett <steveb@workware.net.au> 2010-10-15 11:02:51 +1000
commit: 0e12f3e070c6ff3ba4542a6ea99da0380964e844 (patch)
tree: 0167d98dd703ab44ea45364ac2b2cce87cf10fea
parent: 6f5c9bf0dd12cd5a166594a554ebe9380d81f37e (diff)
download: jimtcl-0e12f3e070c6ff3ba4542a6ea99da0380964e844.zip
jimtcl-0e12f3e070c6ff3ba4542a6ea99da0380964e844.tar.gz
jimtcl-0e12f3e070c6ff3ba4542a6ea99da0380964e844.tar.bz2
3 files changed, 290 insertions, 12 deletions
diff --git a/jim.c b/jim.c
index e2498cb..6d9d8bc 100644
--- a/jim.c
+++ b/jim.c
@@ -92,6 +92,7 @@ static int JimCallProcedure(Jim_Interp *interp, Jim_Cmd *cmd, const char *filena
     int argc, Jim_Obj *const *argv);
 static int JimEvalObjVector(Jim_Interp *interp, int objc, Jim_Obj *const *objv,
     const char *filename, int linenr);
+static int JimGetWideNoErr(Jim_Interp *interp, Jim_Obj *objPtr, jim_wide * widePtr);
 
 static const Jim_HashTableType JimVariablesHashTableType;
 
@@ -312,14 +313,19 @@ int Jim_WideToString(char *buf, jim_wide wideValue)
     return sprintf(buf, fmt, wideValue);
 }
 
-int Jim_StringToWide(const char *str, jim_wide * widePtr, int base)
+/**
+ * After an strtol()/strtod()-like conversion,
+ * check whether something was converted and that
+ * the only thing left is white space.
+ * 
+ * Returns JIM_OK or JIM_ERR.
+ */
+static int JimCheckConversion(const char *str, const char *endptr)
 {
-    char *endptr;
-
-    *widePtr = strtoull(str, &endptr, base);
-
-    if ((str[0] == '\0') || (str == endptr))
+    if (str[0] == '\0' || str == endptr) {
         return JIM_ERR;
+    }
+
     if (endptr[0] != '\0') {
         while (*endptr) {
             if (!isspace(*endptr)) {
@@ -331,6 +337,15 @@ int Jim_StringToWide(const char *str, jim_wide * widePtr, int base)
     return JIM_OK;
 }
 
+int Jim_StringToWide(const char *str, jim_wide * widePtr, int base)
+{
+    char *endptr;
+
+    *widePtr = strtoull(str, &endptr, base);
+
+    return JimCheckConversion(str, endptr);
+}
+
 int Jim_DoubleToString(char *buf, double doubleValue)
 {
     int len;
@@ -361,11 +376,12 @@ int Jim_StringToDouble(const char *str, double *doublePtr)
 {
     char *endptr;
 
+    /* Callers can check for underflow via ERANGE */
+    errno = 0;
+
     *doublePtr = strtod(str, &endptr);
-    if (str[0] == '\0' || endptr[0] != '\0' || (str == endptr)) {
-        return JIM_ERR;
-    }
-    return JIM_OK;
+
+    return JimCheckConversion(str, endptr);
 }
 
 static jim_wide JimPowWide(jim_wide b, jim_wide e)
@@ -2255,6 +2271,75 @@ static Jim_Obj *JimStringTrimRight(Jim_Interp *interp, Jim_Obj *strObjPtr, Jim_O
     return Jim_NewStringObjNoAlloc(interp, buf, -1);
 }
 
+
+static int JimStringIs(Jim_Interp *interp, Jim_Obj *strObjPtr, Jim_Obj *strClass, int strict)
+{
+    static const char *strclassnames[] = {
+        "integer", "alpha", "alnum", "ascii", "digit",
+        "double", "lower", "upper", "space", "xdigit",
+        "control", "print", "graph", "punct",
+        NULL
+    };
+    enum {
+        STR_IS_INTEGER, STR_IS_ALPHA, STR_IS_ALNUM, STR_IS_ASCII, STR_IS_DIGIT,
+        STR_IS_DOUBLE, STR_IS_LOWER, STR_IS_UPPER, STR_IS_SPACE, STR_IS_XDIGIT,
+        STR_IS_CONTROL, STR_IS_PRINT, STR_IS_GRAPH, STR_IS_PUNCT
+    };
+    int strclass;
+    int len;
+    int i;
+    const char *str;
+    int (*isclassfunc)(int c) = NULL;
+
+    if (Jim_GetEnum(interp, strClass, strclassnames, &strclass, "class", JIM_ERRMSG | JIM_ENUM_ABBREV) != JIM_OK) {
+        return JIM_ERR;
+    }
+
+    str = Jim_GetString(strObjPtr, &len);
+    if (len == 0) {
+        Jim_SetResultInt(interp, !strict);
+        return JIM_OK;
+    }
+
+    switch (strclass) {
+        case STR_IS_INTEGER:
+            {
+                jim_wide w;
+                Jim_SetResultInt(interp, JimGetWideNoErr(interp, strObjPtr, &w) == JIM_OK);
+                return JIM_OK;
+            }
+
+        case STR_IS_DOUBLE:
+            {
+                double d;
+                Jim_SetResultInt(interp, Jim_GetDouble(interp, strObjPtr, &d) == JIM_OK && errno != ERANGE);
+                return JIM_OK;
+            }
+
+        case STR_IS_ALPHA: isclassfunc = isalpha; break;
+        case STR_IS_ALNUM: isclassfunc = isalnum; break;
+        case STR_IS_ASCII: isclassfunc = isascii; break;
+        case STR_IS_DIGIT: isclassfunc = isdigit; break;
+        case STR_IS_LOWER: isclassfunc = islower; break;
+        case STR_IS_UPPER: isclassfunc = isupper; break;
+        case STR_IS_SPACE: isclassfunc = isspace; break;
+        case STR_IS_XDIGIT: isclassfunc = isxdigit; break;
+        case STR_IS_CONTROL: isclassfunc = iscntrl; break;
+        case STR_IS_PRINT: isclassfunc = isprint; break;
+        case STR_IS_GRAPH: isclassfunc = isgraph; break;
+        case STR_IS_PUNCT: isclassfunc = ispunct; break;
+    }
+
+    for (i = 0; i < len; i++) {
+        if (!isclassfunc(str[i])) {
+            Jim_SetResultInt(interp, 0);
+            return JIM_OK;
+        }
+    }
+    Jim_SetResultInt(interp, 1);
+    return JIM_OK;
+}
+
 /* This is the core of the [format] command.
  * TODO: Lots of things work - via a hack
  *       However, no format item can be >= JIM_MAX_FMT
@@ -12118,13 +12203,13 @@ static int Jim_StringCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const *a
     int opt_case = 1;
     int option;
     static const char *options[] = {
-        "length", "compare", "match", "equal", "range", "map",
+        "length", "compare", "match", "equal", "is", "range", "map",
         "repeat", "reverse", "index", "first", "last",
         "trim", "trimleft", "trimright", "tolower", "toupper", NULL
     };
     enum
     {
-        OPT_LENGTH, OPT_COMPARE, OPT_MATCH, OPT_EQUAL, OPT_RANGE, OPT_MAP,
+        OPT_LENGTH, OPT_COMPARE, OPT_MATCH, OPT_EQUAL, OPT_IS, OPT_RANGE, OPT_MAP,
         OPT_REPEAT, OPT_REVERSE, OPT_INDEX, OPT_FIRST, OPT_LAST,
         OPT_TRIM, OPT_TRIMLEFT, OPT_TRIMRIGHT, OPT_TOLOWER, OPT_TOUPPER
     };
@@ -12349,6 +12434,13 @@ static int Jim_StringCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const *a
                 Jim_SetResult(interp, JimStringToUpper(interp, argv[2]));
             }
             return JIM_OK;
+
+        case OPT_IS:
+            if (argc == 4 || (argc == 5 && Jim_CompareStringImmediate(interp, argv[3], "-strict"))) {
+                return JimStringIs(interp, argv[argc - 1], argv[2], argc == 5);
+            }
+            Jim_WrongNumArgs(interp, 2, argv, "class ?-strict? str");
+            return JIM_ERR;
     }
     return JIM_OK;
 }
diff --git a/jim_tcl.txt b/jim_tcl.txt
index d2640f8..4299d37 100644
--- a/jim_tcl.txt
+++ b/jim_tcl.txt
@@ -75,6 +75,7 @@ Since v0.62:
 12. Enhance 'try ... on ... finally' to be more Tcl 8.6 compatible
 13. It is now possible to 'return' from within 'try'
 14. IPv6 support is now included
+15. Add 'string is'
 
 Since v0.61:
 
@@ -3391,6 +3392,28 @@ The legal options (which may be abbreviated) are:
  ::
     See STRING AND LIST INDEX SPECIFICATIONS for all allowed forms for *charIndex*.
 
++*string is* 'class' ?*-strict*? 'string'+::
+ Returns 1 if *string* is a valid member of the specified character
+ class, otherwise returns 0. If '-strict' is specified, then an
+ empty string returns 0, otherwise an empty string will return 1
+ on any class. The following character classes are recognized
+ (the class name can be abbreviated):
+  +alnum+;;  Any alphabet or digit character.
+  +alpha+;;  Any alphabet character.
+  +ascii+;;  Any character with a value less than 128 (those that are in the 7-bit ascii range).
+  +control+;; Any control character.
+  +digit+;;  Any digit character.
+  +double+;; Any of the valid forms for a double in Tcl, with optional surrounding whitespace.
+             In case of under/overflow in the value, 0 is returned.
+  +graph+;;  Any printing character, except space.
+  +integer+;; Any of the valid string formats for an integer value in Tcl, with optional surrounding whitespace.
+  +lower+;;  Any lower case alphabet character.
+  +print+;;  Any printing character, including space.
+  +punct+;;  Any punctuation character.
+  +space+;;  Any space character.
+  +upper+;;  Any upper case alphabet character.
+  +xdigit+;; Any hexadecimal digit character ([0-9A-Fa-f]).
+
 +*string last* 'string1 string2 ?lastIndex?'+::
     Search *string2* for a sequence of characters that exactly match
     the characters in *string1*.  If found, return the index of the
diff --git a/tests/string.test b/tests/string.test
index 2d02ba3..3336d7d 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -214,6 +214,169 @@ test string-5.9 {string index} {
 #    list [catch {string index "abc" end-00289} msg]
 #} {1}
 
+test string-6.1 {string is, too few args} {
+    list [catch {string is} msg] $msg
+} {1 {wrong # args: should be "string is class ?-strict? str"}}
+test string-6.2 {string is, too few args} {
+    list [catch {string is alpha} msg] $msg
+} {1 {wrong # args: should be "string is class ?-strict? str"}}
+test string-6.3 {string is, bad args} {
+    list [catch {string is alpha -failin str} msg] $msg
+} {1 {wrong # args: should be "string is class ?-strict? str"}}
+test string-6.4 {string is, too many args} {
+    list [catch {string is alpha -failin var -strict str more} msg] $msg
+} {1 {wrong # args: should be "string is class ?-strict? str"}}
+test string-6.5 {string is, class check} {
+    list [catch {string is bogus str} msg] $msg
+} {1 {bad class "bogus": must be alnum, alpha, ascii, control, digit, double, graph, integer, lower, print, punct, space, upper, or xdigit}}
+test string-6.6 {string is, ambiguous class} {
+    list [catch {string is al str} msg] $msg
+} {1 {ambiguous class "al": must be alnum, alpha, ascii, control, digit, double, graph, integer, lower, print, punct, space, upper, or xdigit}}
+test string-6.10 {string is, ok on empty} {
+    string is alpha {}
+} 1
+test string-6.11 {string is, -strict check against empty} {
+    string is alpha -strict {}
+} 0
+test string-6.12 {string is alnum, true} {
+    string is alnum abc123
+} 1
+test string-6.15 {string is alpha, true} {
+    string is alpha abc
+} 1
+test string-6.24 {string is digit, true} {
+    string is digit 0123456789
+} 1
+test string-6.25 {string is digit, false} {
+    list [string is digit 0123Ü567]
+} {0}
+test string-6.26 {string is digit, false} {
+    list [string is digit +123567]
+} {0}
+test string-6.27 {string is double, true} {
+    string is double 1
+} 1
+test string-6.28 {string is double, true} {
+    string is double [expr double(1)]
+} 1
+test string-6.29 {string is double, true} {
+    string is double 1.0
+} 1
+test string-6.30 {string is double, true} {
+    string is double [string compare a a]
+} 1
+test string-6.31 {string is double, true} {
+    string is double "   +1.0e-1  "
+} 1
+test string-6.32 {string is double, true} {
+    string is double "\n1.0\v"
+} 1
+test string-6.33 {string is double, false} {
+    list [string is double  1abc]
+} {0}
+test string-6.34 {string is double, false} {
+    list [string is double  abc]
+} {0}
+test string-6.35 {string is double, false} {
+    list [string is double  "   1.0e4e4  "]
+} {0}
+test string-6.36 {string is double, false} {
+    list [string is double  "\n"]
+} {0}
+test string-6.38 {string is double, false on underflow} {
+    list [string is double  123e-9999]
+} {0}
+test string-6.39 {string is double, false} {
+    # This test is non-portable because IRIX thinks 
+    # that .e1 is a valid double - this is really a bug
+    # on IRIX as .e1 should NOT be a valid double
+
+    list [string is double  .e1]
+} {0}
+test string-6.48 {string is integer, true} {
+    string is integer +1234567890
+} 1
+test string-6.49 {string is integer, true on type} {
+    string is integer [expr int(50.0)]
+} 1
+test string-6.50 {string is integer, true} {
+    string is integer [list -10]
+} 1
+test string-6.51 {string is integer, true as hex} {
+    string is integer 0xabcdef
+} 1
+test string-6.52 {string is integer, true as octal} {
+    string is integer 012345
+} 1
+test string-6.53 {string is integer, true with whitespace} {
+    string is integer "  \n1234\v"
+} 1
+test string-6.54 {string is integer, false} {
+    list [string is integer  123abc]
+} 0
+test string-6.56 {string is integer, false} {
+    list [string is integer  [expr double(1)]]
+} 0
+test string-6.57 {string is integer, false} {
+    list [string is integer  "    "]
+} 0
+test string-6.58 {string is integer, false on bad octal} {
+    list [string is integer  036963]
+} 0
+test string-6.59 {string is integer, false on bad hex} {
+    list [string is integer  0X345XYZ]
+} 0
+test string-6.60 {string is lower, true} {
+    string is lower abc
+} 1
+test string-6.62 {string is lower, false} {
+    list [string is lower  aBc]
+} 0
+test string-6.63 {string is lower, false} {
+    list [string is lower  abc1]
+} 0
+test string-6.64 {string is lower, unicode false} {
+    list [string is lower  abÜUE]
+} 0
+test string-6.65 {string is space, true} {
+    string is space " \t\n\v\f"
+} 1
+test string-6.66 {string is space, false} {
+    list [string is space  " \t\n\v1\f"]
+} 0
+test string-6.75 {string is upper, true} {
+    string is upper ABC
+} 1
+test string-6.77 {string is upper, false} {
+    list [string is upper  AbC]
+} 0
+test string-6.78 {string is upper, false} {
+    list [string is upper  AB2C]
+} 0
+test string-6.84 {string is control} {
+    ## Control chars are in the ranges
+    ## 00..1F && 7F..9F
+    list [string is control  \x00\x01\x10\x1F\x7F\x80\x9F\x60]
+} 0
+test string-6.85 {string is control} {
+    string is control \u0100
+} 0
+test string-6.86 {string is graph} {
+    ## graph is any print char, except space
+    list [string is gra  "0123abc!@#\$ "]
+} 0
+test string-6.87 {string is print} {
+    ## basically any printable char
+    list [string is print  "0123abc!@#\$ \010"]
+} 0
+test string-6.88 {string is punct} {
+    ## any graph char that isn't alnum
+    list [string is punct  "_!@#\000beq0"]
+} 0
+test string-6.89 {string is xdigit} {
+    list [string is xdigit  0123456789\u0061bcdefABCDEFg]
+} 0
+
 test string-7.1 {string last, too few args} {
     list [catch {string last a} msg]
 } {1}
author	Steve Bennett <steveb@workware.net.au>	2010-08-31 07:18:26 +1000
committer	Steve Bennett <steveb@workware.net.au>	2010-10-15 11:02:51 +1000
commit	0e12f3e070c6ff3ba4542a6ea99da0380964e844 (patch)
tree	0167d98dd703ab44ea45364ac2b2cce87cf10fea
parent	6f5c9bf0dd12cd5a166594a554ebe9380d81f37e (diff)
download	jimtcl-0e12f3e070c6ff3ba4542a6ea99da0380964e844.zip jimtcl-0e12f3e070c6ff3ba4542a6ea99da0380964e844.tar.gz jimtcl-0e12f3e070c6ff3ba4542a6ea99da0380964e844.tar.bz2