Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

PATCH: (provisional) underscores in constants in numeric evaluation



Another of those occasions when "wouldn't it be helpful if..." toppled
over the edge into frustration.

Some scripting languages allow dummy "_"s in numeric constants so you
can count the decimal places.  This is very convenient if you're doing
arithmetic with large integers.

It seems to me (watch this assumption closely) that if we restrict this
to handling constants in arithmetic evaluation, where we already know we
require a numeric constant and we already know the syntax needs to be
that of an arithmetic expression, all substitutions having been done
by this point, we can get away with doing this without a new option.  It
would not be safe to modify zstrtol() to do this more widely.

A quick poll of Perl and Ruby suggests they both allow underscores in
decimal constants.  That's the messiest (and least efficient) part of
this patch --- but it should be safe.  I don't fancy replacing
strtod(); floating point numbers need careful handling.

I won't be committing this any time particularly soon.

Index: Src/math.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/math.c,v
retrieving revision 1.41
diff -p -u -r1.41 math.c
--- Src/math.c	19 Jun 2011 16:26:11 -0000	1.41
+++ Src/math.c	30 Aug 2012 13:55:36 -0000
@@ -452,7 +452,7 @@ lexconstant(void)
 	nptr++;
 	if (*nptr == 'x' || *nptr == 'X') {
 	    /* Let zstrtol parse number with base */
-	    yyval.u.l = zstrtol(ptr, &ptr, 0);
+	    yyval.u.l = zstrtol_underscore(ptr, &ptr, 0, 1);
 	    /* Should we set lastbase here? */
 	    lastbase = 16;
 	    return NUM;
@@ -466,13 +466,13 @@ lexconstant(void)
 	     * it can't be a base indication (always decimal)
 	     * or a floating point number.
 	     */
-	    for (ptr2 = nptr; idigit(*ptr2); ptr2++)
+	    for (ptr2 = nptr; idigit(*ptr2) || *ptr2 == '_'; ptr2++)
 		;
 
 	    if (ptr2 > nptr && *ptr2 != '.' && *ptr2 != 'e' &&
 		*ptr2 != 'E' && *ptr2 != '#')
 	    {
-		yyval.u.l = zstrtol(ptr, &ptr, 0);
+		yyval.u.l = zstrtol_underscore(ptr, &ptr, 0, 1);
 		lastbase = 8;
 		return NUM;
 	    }
@@ -481,17 +481,43 @@ lexconstant(void)
     }
     else
     {
-	while (idigit(*nptr))
+	while (idigit(*nptr) || *nptr == '_')
 	    nptr++;
     }
 
     if (*nptr == '.' || *nptr == 'e' || *nptr == 'E') {
+	char *ptr2;
 	/* it's a float */
 	yyval.type = MN_FLOAT;
 #ifdef USE_LOCALE
 	prev_locale = dupstring(setlocale(LC_NUMERIC, NULL));
 	setlocale(LC_NUMERIC, "POSIX");
 #endif
+	if (*nptr == '.') {
+	    nptr++;
+	    while (idigit(*nptr) || *nptr == '_')
+		nptr++;
+	}
+	if (*nptr == 'e' || *nptr == 'E') {
+	    nptr++;
+	    if (*nptr == '+' || *nptr == '-')
+		nptr++;
+	    while (idigit(*nptr) || *nptr == '_')
+		nptr++;
+	}
+	for (ptr2 = ptr; ptr2 < nptr; ptr2++) {
+	    if (*ptr2 == '_') {
+		int len = nptr - ptr;
+		ptr = strdup(ptr);
+		for (ptr2 = ptr; len; len--) {
+		    if (*ptr2 == '_')
+			chuck(ptr2);
+		    else
+			ptr2++;
+		}
+		break;
+	    }
+	}
 	yyval.u.d = strtod(ptr, &nptr);
 #ifdef USE_LOCALE
 	if (prev_locale) setlocale(LC_NUMERIC, prev_locale);
@@ -503,11 +529,12 @@ lexconstant(void)
 	ptr = nptr;
     } else {
 	/* it's an integer */
-	yyval.u.l = zstrtol(ptr, &ptr, 10);
+	yyval.u.l = zstrtol_underscore(ptr, &ptr, 10, 1);
 
 	if (*ptr == '#') {
 	    ptr++;
-	    yyval.u.l = zstrtol(ptr, &ptr, lastbase = yyval.u.l);
+	    lastbase = yyval.u.l;
+	    yyval.u.l = zstrtol_underscore(ptr, &ptr, lastbase, 1);
 	}
     }
     return NUM;
Index: Src/utils.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/utils.c,v
retrieving revision 1.270
diff -p -u -r1.270 utils.c
--- Src/utils.c	27 Jun 2012 07:10:29 -0000	1.270
+++ Src/utils.c	30 Aug 2012 13:55:36 -0000
@@ -2030,13 +2030,20 @@ skipparens(char inpar, char outpar, char
    return level;
 }
 
+/**/
+mod_export zlong
+zstrtol(const char *s, char **t, int base)
+{
+    return zstrtol_underscore(s, t, base, 0);
+}
+
 /* Convert string to zlong (see zsh.h).  This function (without the z) *
  * is contained in the ANSI standard C library, but a lot of them seem *
  * to be broken.                                                       */
 
 /**/
 mod_export zlong
-zstrtol(const char *s, char **t, int base)
+zstrtol_underscore(const char *s, char **t, int base, int underscore)
 {
     const char *inp, *trunc = NULL;
     zulong calc = 0, newcalc = 0;
@@ -2062,22 +2069,24 @@ zstrtol(const char *s, char **t, int bas
     if (base < 2 || base > 36) {
 	zerr("invalid base (must be 2 to 36 inclusive): %d", base);
 	return (zlong)0;
-    } else if (base <= 10)
-	for (; *s >= '0' && *s < ('0' + base); s++) {
-	    if (trunc)
+    } else if (base <= 10) {
+	for (; (*s >= '0' && *s < ('0' + base)) ||
+		 (underscore && *s == '_'); s++) {
+	    if (trunc || *s == '_')
 		continue;
 	    newcalc = calc * base + *s - '0';
 	    if (newcalc < calc)
 	    {
-	      trunc = s;
-	      continue;
+		trunc = s;
+		continue;
 	    }
 	    calc = newcalc;
 	}
-    else
+    } else {
 	for (; idigit(*s) || (*s >= 'a' && *s < ('a' + base - 10))
-	     || (*s >= 'A' && *s < ('A' + base - 10)); s++) {
-	    if (trunc)
+	     || (*s >= 'A' && *s < ('A' + base - 10))
+	     || (underscore && *s == '_'); s++) {
+	    if (trunc || *s == '_')
 		continue;
 	    newcalc = calc*base + (idigit(*s) ? (*s - '0') : (*s & 0x1f) + 9);
 	    if (newcalc < calc)
@@ -2087,6 +2096,7 @@ zstrtol(const char *s, char **t, int bas
 	    }
 	    calc = newcalc;
 	}
+    }
 
     /*
      * Special case: check for a number that was just too long for

-- 
Peter Stephenson <pws@xxxxxxx>            Software Engineer
Tel: +44 (0)1223 692070                   Cambridge Silicon Radio Limited
Churchill House, Cambridge Business Park, Cowley Road, Cambridge, CB4 0WZ, UK


Member of the CSR plc group of companies. CSR plc registered in England and Wales, registered number 4187346, registered office Churchill House, Cambridge Business Park, Cowley Road, Cambridge, CB4 0WZ, United Kingdom
More information can be found at www.csr.com. Follow CSR on Twitter at http://twitter.com/CSR_PLC and read our blog at www.csr.com/blog



Messages sorted by: Reverse Date, Date, Thread, Author