Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

[PATCH 1/2] Consolidated nofork substitution changes



Because I've fixed some additional bugs and added new tests and
rebased on recent master branch, I'm reposting the entire nofork (mksh
${|...} and ksh ${ ... }) substitution diffs rather than do another
incremental patch.  To make the individual patches a more reasonable
size, I've split the C code and doc from the tests.

These diffs now rely on the change to input.c posted in workers/52153
to properly handle trailing newlines in the output captured by ${ ...
}.
diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo
index f87832e75..5be40bf25 100644
--- a/Doc/Zsh/expn.yo
+++ b/Doc/Zsh/expn.yo
@@ -1881,23 +1881,55 @@ sect(Command Substitution)
 cindex(command substitution)
 cindex(substitution, command)
 A command enclosed in parentheses preceded by a dollar sign, like
-`tt($LPAR())...tt(RPAR())', or quoted with grave
-accents, like `tt(`)...tt(`)', is replaced with its standard output, with
-any trailing newlines deleted.
-If the substitution is not enclosed in double quotes, the
-output is broken into words using the tt(IFS) parameter.
+`tt($LPAR())...tt(RPAR())', or quoted with grave accents, like
+`tt(`)...tt(`)', is executed in a subshell and replaced by its
+standard output, with any trailing newlines deleted.  If the
+substitution is not enclosed in double quotes, the output is broken
+into words using the tt(IFS) parameter.
 vindex(IFS, use of)
 
 The substitution `tt($LPAR()cat) var(foo)tt(RPAR())' may be replaced
 by the faster `tt($LPAR()<)var(foo)tt(RPAR())'.  In this case var(foo)
 undergoes single word shell expansions (em(parameter expansion),
 em(command substitution) and em(arithmetic expansion)), but not
-filename generation.
+filename generation.  No subshell is created.
 
 If the option tt(GLOB_SUBST) is set, the result of any unquoted command
 substitution, including the special form just mentioned, is eligible for
 filename generation.
 
+A command with a leading pipe character, enclosed in braces prefixed by
+a dollar sign, as in `tt(${|)...tt(})', is executed in the current shell
+context, rather than in a subshell, and is replaced by the value of the
+parameter tt(REPLY) at the end of the command.  There em(must not) be
+any whitespace between the opening brace and the pipe character.  Any
+prior value of tt($REPLY) is saved and restored around this substitution,
+in the manner of a function local parameter.  Other parameters declared
+within the substitution also behave as locals, as if in a function,
+unless `tt(typeset -g)' is used.  Trailing newlines are em(not) deleted
+from the final replacement in this case, and it is subject to filename
+generation in the same way as `tt($LPAR())...tt(RPAR())' but is em(not)
+split on tt(IFS) unless the tt(SH_WORD_SPLIT) option is set.
+
+Substitutions of the form `tt(${|)var(param)tt(|)...tt(})' are similar,
+except that the substitution is replaced by the value of the parameter
+named by var(param).  No implicit save or restore applies to var(param)
+except as noted for tt(REPLY), and var(param) should em(not) be declared
+within the command.  If var(param) names an array, array expansion rules
+apply.
+
+A command enclosed in braces preceded by a dollar sign, and set off from
+the braces by whitespace, like `tt(${ )...tt( })', is replaced by its
+standard output.  Like `tt(${|)...tt(})' and unlike
+`tt($LPAR())...tt(RPAR())', the command executes in the current shell
+context with function local behaviors and does not create a subshell.
+
+Note that because the `tt(${|)...tt(})' and `tt(${ )...tt( })' forms
+must be parsed at once as both string tokens and commands, all other
+braces (`tt({)' or `tt(})') within the command either must be quoted,
+or must appear in syntactically valid pairs, such as around complex
+commands, function bodies, or parameter references.
+
 texinode(Arithmetic Expansion)(Brace Expansion)(Command Substitution)(Expansion)
 sect(Arithmetic Expansion)
 cindex(arithmetic expansion)
diff --git a/Src/lex.c b/Src/lex.c
index 2f7937410..33b17cc95 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -937,7 +937,7 @@ static enum lextok
 gettokstr(int c, int sub)
 {
     int bct = 0, pct = 0, brct = 0, seen_brct = 0, fdpar = 0;
-    int intpos = 1, in_brace_param = 0;
+    int intpos = 1, in_brace_param = 0, cmdsubst = 0;
     int inquote, unmatched = 0;
     enum lextok peek;
 #ifdef DEBUG
@@ -1135,7 +1135,7 @@ gettokstr(int c, int sub)
 	    c = Inpar;
 	    break;
 	case LX2_INBRACE:
-	    if (isset(IGNOREBRACES) || sub)
+	    if ((isset(IGNOREBRACES) && !cmdsubst) || sub)
 		c = '{';
 	    else {
 		if (!lexbuf.len && incmdpos) {
@@ -1157,8 +1157,11 @@ gettokstr(int c, int sub)
 	    if (in_brace_param) {
 		cmdpop();
 	    }
-	    if (bct-- == in_brace_param)
-		in_brace_param = 0;
+	    if (bct-- == in_brace_param) {
+		if (cmdsubst)
+		    cmdpop();
+		in_brace_param = cmdsubst = 0;
+	    }
 	    c = Outbrace;
 	    break;
 	case LX2_COMMA:
@@ -1405,16 +1408,24 @@ gettokstr(int c, int sub)
        }
        add(c);
        c = hgetc();
-	if (intpos)
+       if (intpos)
 	    intpos--;
-	if (lexstop)
+       if (lexstop)
 	    break;
+       if (!cmdsubst && in_brace_param && act == LX2_STRING &&
+	   (c == '|' || c == Bar || inblank(c))) {
+	   cmdsubst = in_brace_param;
+	   cmdpush(CS_CURSH);
+       }
     }
   brk:
     if (errflag) {
 	if (in_brace_param) {
-	    while(bct-- >= in_brace_param)
+	    while(bct >= in_brace_param) {
+		if (bct-- == cmdsubst)
+		    cmdpop();
 		cmdpop();
+	    }
 	}
 	return LEXERR;
     }
@@ -1422,8 +1433,11 @@ gettokstr(int c, int sub)
     if (unmatched && !(lexflags & LEXFLAGS_ACTIVE))
 	zerr("unmatched %c", unmatched);
     if (in_brace_param) {
-	while(bct-- >= in_brace_param)
+	while(bct >= in_brace_param) {
+	    if (bct-- == cmdsubst)
+		cmdpop();
 	    cmdpop();
+	}
 	zerr("closing brace expected");
     } else if (unset(IGNOREBRACES) && !sub && lexbuf.len > 1 &&
 	       peek == STRING && lexbuf.ptr[-1] == '}' &&
@@ -1459,8 +1473,8 @@ gettokstr(int c, int sub)
 static int
 dquote_parse(char endchar, int sub)
 {
-    int pct = 0, brct = 0, bct = 0, intick = 0, err = 0;
-    int c;
+    int pct = 0, brct = 0, bct = 0, intick = 0, err = 0, cmdsubst = 0;
+    int c, bskip = 0;
     int math = endchar == ')' || endchar == ']' || infor;
     int zlemath = math && zlemetacs > zlemetall + addedx - inbufct;
 
@@ -1529,11 +1543,25 @@ dquote_parse(char endchar, int sub)
 		c = Qstring;
 	    }
 	    break;
+	case '{':
+	    if (cmdsubst && !intick) {
+		/* In nofork substitution, tokenize as if unquoted */
+		c = Inbrace;
+		bskip++;
+	    }
+	    break;
 	case '}':
 	    if (intick || !bct)
 		break;
 	    c = Outbrace;
-	    bct--;
+	    if (bskip) {
+		bskip--;
+		break;
+	    }
+	    if (bct-- == cmdsubst) {
+		cmdsubst = 0;
+		cmdpop();
+	    }
 	    cmdpop();
 	    break;
 	case '`':
@@ -1588,14 +1616,34 @@ dquote_parse(char endchar, int sub)
 	if (err || lexstop)
 	    break;
 	add(c);
+	if (!cmdsubst && c == Inbrace) {
+	    /* Check for ${|...} nofork command substitution */
+	    if ((c = hgetc()) && !lexstop) {
+		if (c == '|' || inblank(c)) {
+		    cmdsubst = bct;
+		    cmdpush(CS_CURSH);
+		}
+		hungetc(c);
+	    }
+	}
     }
     if (intick == 2)
 	ALLOWHIST
     if (intick) {
 	cmdpop();
     }
-    while (bct--)
+    while (bct) {
+	if (bct-- == cmdsubst) {
+	    /*
+	     * You would think this is an error, but if we call it one,
+	     * parsestrnoerr() returns nonzero to subst_parse_str() and
+	     * subsequently "bad substitution" is not reported
+	     */
+	    /* err = 1 */
+	    cmdpop();
+	}
 	cmdpop();
+    }
     if (lexstop)
 	err = intick || endchar || err;
     else if (err == 1) {
diff --git a/Src/subst.c b/Src/subst.c
index d68159227..52afd6484 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -1867,6 +1867,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
      * joining the array into a string (for compatibility with ksh/bash).
      */
     int quoted_array_with_offset = 0;
+    /* Indicates ${|...;} */
+    char *rplyvar = NULL;
+    /* Indicates ${ ... ;} */
+    char *rplytmp = NULL;
 
     *s++ = '\0';
     /*
@@ -1894,8 +1898,147 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
      * flags in parentheses, but also one ksh hack.
      */
     if (c == Inbrace) {
+	/* The command string to be run by ${|...;} */
+	char *cmdarg = NULL;
+	size_t slen = 0;
 	inbrace = 1;
 	s++;
+
+        /* Short-path for the nofork command substitution ${|cmd;}
+	 * See other comments about kludges for why this is here.
+	 *
+         * The command string is extracted and executed, and the
+         * substitution assigned. There's no (...)-flags processing,
+         * i.e. no ${|(U)cmd;}, because it looks quite awful and
+         * should not be part of command substitution in any case.
+         * Use ${(U)${|cmd;}} as you would for ${(U)$(cmd;)}.
+	 */
+	if (*s == '|' || *s == Bar || inblank(*s)) {
+	    char *outbracep = s;
+	    char sav = *s;
+	    *s = Inbrace;
+	    if (skipparens(Inbrace, Outbrace, &outbracep) == 0) {
+		slen = outbracep - s - 1;
+		if ((*s = sav) != Bar) {
+		    sav = *outbracep;
+		    *outbracep = '\0';
+		    tokenize(s);
+		    *outbracep = sav;
+		}
+	    }
+	}
+	if (slen > 1) {
+	    char *outbracep = s + slen;
+	    if (*outbracep == Outbrace) {
+		if ((rplyvar = itype_end(s+1, INAMESPC, 0))) {
+		    if (*rplyvar == Inbrack &&
+			(rplyvar = parse_subscript(++rplyvar, 1, ']')))
+			++rplyvar;
+		}
+		if (rplyvar == s+1 && *rplyvar == Bar) {
+		    /* Is ${||...} a subtitution error or a syntax error?
+		    zerr("bad substitution");
+		    return NULL;
+		    */
+		    rplyvar = NULL;
+		}
+		if (rplyvar && *rplyvar == Bar) {
+		    cmdarg = dupstrpfx(rplyvar+1, outbracep-rplyvar-1);
+		    rplyvar = dupstrpfx(s+1,rplyvar-s-1);
+		} else {
+		    cmdarg = dupstrpfx(s+1, outbracep-s-1);
+		    rplyvar = "REPLY";
+		}
+		if (inblank(*s)) {
+		    /*
+		     * Admittedly a hack.  Take advantage of the enforced
+		     * locality of REPLY and the semantics of $(<file) to
+		     * construct a command to write/read a temporary file.
+		     * Then fall through to the regular handling of $REPLY
+		     * to manage word splitting, expansion flags, etc.
+		     */
+		    char *outfmt = ">| %s { %s ;}";	/* 13 */
+		    if ((rplytmp = gettempname(NULL, 1))) {
+			/* Prevent shenanigans with $TMPPREFIX */
+			char *tmpfile = quotestring(rplytmp, QT_BACKSLASH);
+			char *dummy = zhalloc(strlen(cmdarg) +
+					      strlen(tmpfile) +
+					      13);
+			sprintf(dummy, outfmt, tmpfile, cmdarg);
+			cmdarg = dummy;
+		    } else {
+			/* TMPPREFIX not writable? */
+			cmdoutval = lastval;
+			cmdarg = NULL;
+		    }
+		}
+		s = outbracep;
+	    }
+	}
+
+	if (rplyvar) {
+	    Param pm;
+	    /* char *rplyval = getsparam("REPLY"); */
+	    startparamscope(); /* "local" behaves as if in a function */
+	    pm = createparam("REPLY", PM_LOCAL|PM_UNSET);
+	    if (pm)	/* Shouldn't createparam() do this? */
+		pm->level = locallevel;
+	    /* if (rplyval) setsparam("REPLY", ztrdup(rplyval)); */
+	}
+
+	if (rplyvar && cmdarg && *cmdarg) {
+	    int obreaks = breaks;
+	    Eprog cmdprog;
+	    /* Execute the shell command */
+	    untokenize(cmdarg);
+	    cmdprog = parse_string(cmdarg, 0);
+	    if (cmdprog) {
+		execode(cmdprog, 1, 0, "cmdsubst");
+		cmdoutval = lastval;
+		/* "return" behaves as if in a function */
+		if (retflag) {
+		    retflag = 0;
+		    breaks = obreaks;	/* Is this ever not zero? */
+		}
+	    } else	/* parse error */
+		errflag |= ERRFLAG_ERROR;
+	    if (rplytmp && !errflag) {
+		int onoerrs = noerrs;
+		noerrs = 2;
+		if ((cmdarg = ztuff(rplytmp)))
+		    setsparam("REPLY", cmdarg);
+		noerrs = onoerrs;
+	    }
+	}
+
+	if (rplytmp)
+	    unlink(rplytmp);
+	if (rplyvar) {
+	    if (strcmp(rplyvar, "REPLY") == 0) {
+		if ((val = dupstring(getsparam("REPLY"))))
+		    vunset = 0;
+		else {
+		    vunset = 1;
+		    val = dupstring("");
+		}
+	    } else {
+		s = dyncat(rplyvar, s);
+		rplyvar = NULL;
+	    }
+	    endparamscope();
+	    if (exit_pending) {
+		if (mypid == getpid()) {
+		    /*
+		     * paranoia: don't check for jobs, but there
+		     * shouldn't be any if not interactive.
+		     */
+		    stopmsg = 1;
+		    zexit(exit_val, ZEXIT_NORMAL);
+		} else
+		    _exit(exit_val);
+	    }
+	}
+
 	/*
 	 * In ksh emulation a leading `!' is a special flag working
 	 * sort of like our (k).  This is true only for arrays or
@@ -2590,14 +2733,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
 	 * we let fetchvalue set the main string pointer s to
 	 * the end of the bit it's fetched.
 	 */
-	if (!(v = fetchvalue(&vbuf, (subexp ? &ov : &s),
-			     (wantt ? -1 :
-			      ((unset(KSHARRAYS) || inbrace) ? 1 : -1)),
-			     scanflags)) ||
-	    (v->pm && (v->pm->node.flags & PM_UNSET)) ||
-	    (v->flags & VALFLAG_EMPTY))
+	if (!rplyvar &&
+	    (!(v = fetchvalue(&vbuf, (subexp ? &ov : &s),
+			      (wantt ? -1 :
+			       ((unset(KSHARRAYS) || inbrace) ? 1 : -1)),
+			      scanflags)) ||
+	     (v->pm && (v->pm->node.flags & PM_UNSET)) ||
+	     (v->flags & VALFLAG_EMPTY)))
 	    vunset = 1;
-
 	if (wantt) {
 	    /*
 	     * Handle the (t) flag: value now becomes the type


Messages sorted by: Reverse Date, Date, Thread, Author