Zsh Mailing List Archive
Messages sorted by:
Reverse Date,
Date,
Thread,
Author
PATCH: parse from even deeper in hell
- X-seq: zsh-workers 34570
- From: Peter Stephenson <p.stephenson@xxxxxxxxxxx>
- To: Zsh Hackers' List <zsh-workers@xxxxxxx>
- Subject: PATCH: parse from even deeper in hell
- Date: Thu, 19 Feb 2015 10:13:15 +0000
- List-help: <mailto:zsh-workers-help@zsh.org>
- List-id: Zsh Workers List <zsh-workers.zsh.org>
- List-post: <mailto:zsh-workers@zsh.org>
- Mailing-list: contact zsh-workers-help@xxxxxxx; run by ezmlm
- Organization: Samsung Cambridge Solution Centre
  And Tomlinson looked down and down, and saw beneath his feet
  The frontlet of a tortured star milk-white in Hell-Mouth heat.
% print $((echo one); (echo two))
zsh: bad math expression: operator expected at `one); (ech...'
At the point this goes wrong, we've actually already established this is
a command substitution, not a math expression.  However, we're now in
the substitution code and it doesn't have any marker that that's
happened.  Instead, it just looks to see if there are two parentheses at
the end, which there are.
Note that it's not a fix to count active parentheses in the middle at
that point: those aren't tokenized because we're parsing this as a
string for later expansion.  So the ones at the end are the first that
skipparens() picks up.  In any case re-counting when we've already
established what's supposed to happen is a pretty kludgy fix.
The fix here is to use different tokens for the first and last
parenthesis for math.  We then just look for the matching close marker
when we find the open marker.  We can't have nested math expansions so I
think this ought to be robust.
I've incremented the version as this changes the way strings are
tokenized.
The tests might more logically be with command substitution rather than
arithmetic, but I've left them in order to keep the tests for is / isn't
arithmetic in one place for easy comparison.
pws
diff --git a/Config/version.mk b/Config/version.mk
index eb51638..a8eafa5 100644
--- a/Config/version.mk
+++ b/Config/version.mk
@@ -27,5 +27,5 @@
 # This must also serve as a shell script, so do not add spaces around the
 # `=' signs.
 
-VERSION=5.0.7-dev-0
-VERSION_DATE='October 8, 2014'
+VERSION=5.0.7-dev-1
+VERSION_DATE='February 19, 2014'
diff --git a/Src/lex.c b/Src/lex.c
index 0068485..307b6e9 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -35,7 +35,7 @@
 /* tokens */
 
 /**/
-mod_export char ztokens[] = "#$^*()$=|{}[]`<>>?~`,'\"\\\\";
+mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,'\"\\\\";
 
 /* parts of the current token */
 
@@ -473,8 +473,14 @@ add(int c)
 	}							      \
     }
 
+enum {
+    CMD_OR_MATH_CMD,
+    CMD_OR_MATH_MATH,
+    CMD_OR_MATH_ERR
+};
+
 /*
- * Return 1 for math, 0 for a command, 2 for an error.  If it couldn't be
+ * Return one of the above.  If it couldn't be
  * parsed as math, but there was no gross error, it's a command.
  */
 
@@ -496,13 +502,13 @@ cmd_or_math(int cs_type)
 	/* Successfully parsed, see if it was math */
 	c = hgetc();
 	if (c == ')')
-	    return 1; /* yes */
+	    return CMD_OR_MATH_MATH; /* yes */
 	hungetc(c);
 	lexstop = 0;
 	c = ')';
     } else if (lexstop) {
 	/* we haven't got anything to unget */
-	return 2;
+	return CMD_OR_MATH_ERR;
     }
     /* else unsuccessful: unget the whole thing */
     hungetc(c);
@@ -513,15 +519,15 @@ cmd_or_math(int cs_type)
 		ztokens[*lexbuf.ptr - Pound] : *lexbuf.ptr);
     }
     if (errflag)
-	return 2;
+	return CMD_OR_MATH_ERR;
     hungetc('(');
-    return errflag ? 2 : 0;
+    return errflag ? CMD_OR_MATH_ERR : CMD_OR_MATH_CMD;
 }
 
 
 /*
  * Parse either a $(( ... )) or a $(...)
- * Return 0 on success, 1 on failure.
+ * Return the same as cmd_or_math().
  */
 static int
 cmd_or_math_sub(void)
@@ -529,21 +535,23 @@ cmd_or_math_sub(void)
     int c = hgetc(), ret;
 
     if (c == '(') {
+	int lexpos = (int)(lexbuf.ptr - tokstr);
 	add(Inpar);
 	add('(');
-	if ((ret = cmd_or_math(CS_MATHSUBST)) == 1) {
+	if ((ret = cmd_or_math(CS_MATHSUBST)) == CMD_OR_MATH_MATH) {
+	    tokstr[lexpos] = Inparmath;
 	    add(')');
-	    return 0;
+	    return CMD_OR_MATH_MATH;
 	}
-	if (ret == 2)
-	    return 1;
+	if (ret == CMD_OR_MATH_ERR)
+	    return CMD_OR_MATH_ERR;
 	lexbuf.ptr -= 2;
 	lexbuf.len -= 2;
     } else {
 	hungetc(c);
 	lexstop = 0;
     }
-    return skipcomm();
+    return skipcomm() ? CMD_OR_MATH_ERR : CMD_OR_MATH_CMD;
 }
 
 /* Check whether we're looking at valid numeric globbing syntax      *
@@ -764,10 +772,10 @@ gettok(void)
 		lexbuf.ptr = tokstr = (char *)
 		    hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
 		switch (cmd_or_math(CS_MATH)) {
-		case 1:
+		case CMD_OR_MATH_MATH:
 		    return DINPAR;
 
-		case 0:
+		case CMD_OR_MATH_CMD:
 		    /*
 		     * Not math, so we don't return the contents
 		     * as a string in this case.
@@ -987,12 +995,19 @@ gettokstr(int c, int sub)
 		c = Outbrack;
 	    } else if (e == '(') {
 		add(String);
-		c = cmd_or_math_sub();
-		if (c) {
+		switch (cmd_or_math_sub()) {
+		case CMD_OR_MATH_CMD:
+		    c = Outpar;
+		    break;
+
+		case CMD_OR_MATH_MATH:
+		    c = Outparmath;
+		    break;
+
+		default:
 		    peek = LEXERR;
 		    goto brk;
 		}
-		c = Outpar;
 	    } else {
 		if (e == '{') {
 		    add(c);
@@ -1400,8 +1415,19 @@ dquote_parse(char endchar, int sub)
 	    c = hgetc();
 	    if (c == '(') {
 		add(Qstring);
-		err = cmd_or_math_sub();
-		c = Outpar;
+		switch (cmd_or_math_sub()) {
+		case CMD_OR_MATH_CMD:
+		    c = Outpar;
+		    break;
+
+		case CMD_OR_MATH_MATH:
+		    c = Outparmath;
+		    break;
+
+		default:
+		    err = 1;
+		    break;
+		}
 	    } else if (c == '[') {
 		add(String);
 		add(Inbrack);
diff --git a/Src/subst.c b/Src/subst.c
index a2bb648..056b12b 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -195,7 +195,7 @@ stringsubst(LinkList list, LinkNode node, int pf_flags, int asssub)
 
     while (!errflag && (c = *str)) {
 	if ((qt = c == Qstring) || c == String) {
-	    if ((c = str[1]) == Inpar) {
+	    if ((c = str[1]) == Inpar || c == Inparmath) {
 		if (!qt)
 		    list->list.flags |= LF_ARRAY;
 		str++;
@@ -258,6 +258,22 @@ stringsubst(LinkList list, LinkNode node, int pf_flags, int asssub)
 		skipparens(Inpar, Outpar, &str);
 #endif
 		str--;
+	    } else if (c == Inparmath) {
+		/* Math substitution of the form $((...)) */
+		str[-1] = '\0';
+		while (*str != Outparmath && *str)
+		    str++;
+		if (*str != Outparmath) {
+		    zerr("Failed to find end of math substitution");
+		    return NULL;
+		}
+		str[-1] = '\0';
+		if (isset(EXECOPT))
+		    str = arithsubst(str2 + 2, &str3, str+1);
+		else
+		    strncpy(str3, str2, 1);
+		setdata(node, (void *) str3);
+		continue;
 	    } else {
 		endchar = c;
 		*str = '\0';
@@ -266,16 +282,6 @@ stringsubst(LinkList list, LinkNode node, int pf_flags, int asssub)
 		    DPUTS(!*str, "BUG: parse error in command substitution");
 	    }
 	    *str++ = '\0';
-	    if (endchar == Outpar && str2[1] == '(' && str[-2] == ')') {
-		/* Math substitution of the form $((...)) */
-		str[-2] = '\0';
-		if (isset(EXECOPT))
-		    str = arithsubst(str2 + 2, &str3, str);
-		else
-		    strncpy(str3, str2, 1);
-		setdata(node, (void *) str3);
-		continue;
-	    }
 
 	    /* It is a command substitution, which will be parsed again   *
 	     * by the lexer, so we untokenize it first, but we cannot use *
diff --git a/Src/zsh.h b/Src/zsh.h
index dd946d2..9a97263 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -163,40 +163,42 @@ struct mathfunc {
 #define Hat		((char) 0x86)
 #define Star		((char) 0x87)
 #define Inpar		((char) 0x88)
-#define Outpar		((char) 0x89)
-#define Qstring	        ((char) 0x8a)
-#define Equals		((char) 0x8b)
-#define Bar	      	((char) 0x8c)
-#define Inbrace	        ((char) 0x8d)
-#define Outbrace	((char) 0x8e)
-#define Inbrack	        ((char) 0x8f)
-#define Outbrack	((char) 0x90)
-#define Tick		((char) 0x91)
-#define Inang		((char) 0x92)
-#define Outang		((char) 0x93)
-#define OutangProc	((char) 0x94)
-#define Quest		((char) 0x95)
-#define Tilde		((char) 0x96)
-#define Qtick		((char) 0x97)
-#define Comma		((char) 0x98)
+#define Inparmath	((char) 0x89)
+#define Outpar		((char) 0x8a)
+#define Outparmath	((char) 0x8b)
+#define Qstring	        ((char) 0x8c)
+#define Equals		((char) 0x8d)
+#define Bar	      	((char) 0x8e)
+#define Inbrace	        ((char) 0x8f)
+#define Outbrace	((char) 0x90)
+#define Inbrack	        ((char) 0x91)
+#define Outbrack	((char) 0x92)
+#define Tick		((char) 0x93)
+#define Inang		((char) 0x94)
+#define Outang		((char) 0x95)
+#define OutangProc	((char) 0x96)
+#define Quest		((char) 0x97)
+#define Tilde		((char) 0x98)
+#define Qtick		((char) 0x99)
+#define Comma		((char) 0x9a)
 /*
  * Null arguments: placeholders for single and double quotes
  * and backslashes.
  */
-#define Snull		((char) 0x99)
-#define Dnull		((char) 0x9a)
-#define Bnull		((char) 0x9b)
+#define Snull		((char) 0x9b)
+#define Dnull		((char) 0x9c)
+#define Bnull		((char) 0x9d)
 /*
  * Backslash which will be returned to "\" instead of being stripped
  * when we turn the string into a printable format.
  */
-#define Bnullkeep       ((char) 0x9c)
+#define Bnullkeep       ((char) 0x9e)
 /*
  * Null argument that does not correspond to any character.
  * This should be last as it does not appear in ztokens and
  * is used to initialise the IMETA type in inittyptab().
  */
-#define Nularg		((char) 0x9d)
+#define Nularg		((char) 0x9f)
 
 /*
  * Take care to update the use of IMETA appropriately when adding
diff --git a/Test/C01arith.ztst b/Test/C01arith.ztst
index 09c0822..67d78ee 100644
--- a/Test/C01arith.ztst
+++ b/Test/C01arith.ztst
@@ -353,3 +353,26 @@
   '
 0:Non-arithmetic subst with command subsitution parse from hell
 >yes, this one after case in subshell
+
+  print "a$((echo one subst)
+  (echo two subst))b"
+0:Another tricky case that is actually a command substitution
+>aone subst
+>two substb
+
+  print "x$((echo one frob); (echo two frob))y"
+0:Same on a single line
+>xone frob
+>two froby
+
+  # This case actually only works by accident: if it wasn't for the
+  # unbalanced parenthesis this would be a valid math substitution.
+  # Hence it's definitely not recommended code.  However, it does give
+  # the algorithm an extra check.
+  print $((case foo in
+  foo)
+  print Worked OK
+  ;;
+  esac))
+0:Would-be math expansion with extra parenthesis making it a cmd subst
+>Worked OK
Messages sorted by:
Reverse Date,
Date,
Thread,
Author