Zsh Mailing List Archive
Messages sorted by:
Reverse Date,
Date,
Thread,
Author
[PATCH 1/2] Consolidated nofork substitution changes
- X-seq: zsh-workers 52154
- From: Bart Schaefer <schaefer@xxxxxxxxxxxxxxxx>
- To: Zsh hackers list <zsh-workers@xxxxxxx>
- Subject: [PATCH 1/2] Consolidated nofork substitution changes
- Date: Sat, 16 Sep 2023 15:20:30 -0700
- Archived-at: <https://zsh.org/workers/52154>
- List-id: <zsh-workers.zsh.org>
Because I've fixed some additional bugs and added new tests and
rebased on recent master branch, I'm reposting the entire nofork (mksh
${|...} and ksh ${ ... }) substitution diffs rather than do another
incremental patch. To make the individual patches a more reasonable
size, I've split the C code and doc from the tests.
These diffs now rely on the change to input.c posted in workers/52153
to properly handle trailing newlines in the output captured by ${ ...
}.
diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo
index f87832e75..5be40bf25 100644
--- a/Doc/Zsh/expn.yo
+++ b/Doc/Zsh/expn.yo
@@ -1881,23 +1881,55 @@ sect(Command Substitution)
cindex(command substitution)
cindex(substitution, command)
A command enclosed in parentheses preceded by a dollar sign, like
-`tt($LPAR())...tt(RPAR())', or quoted with grave
-accents, like `tt(`)...tt(`)', is replaced with its standard output, with
-any trailing newlines deleted.
-If the substitution is not enclosed in double quotes, the
-output is broken into words using the tt(IFS) parameter.
+`tt($LPAR())...tt(RPAR())', or quoted with grave accents, like
+`tt(`)...tt(`)', is executed in a subshell and replaced by its
+standard output, with any trailing newlines deleted. If the
+substitution is not enclosed in double quotes, the output is broken
+into words using the tt(IFS) parameter.
vindex(IFS, use of)
The substitution `tt($LPAR()cat) var(foo)tt(RPAR())' may be replaced
by the faster `tt($LPAR()<)var(foo)tt(RPAR())'. In this case var(foo)
undergoes single word shell expansions (em(parameter expansion),
em(command substitution) and em(arithmetic expansion)), but not
-filename generation.
+filename generation. No subshell is created.
If the option tt(GLOB_SUBST) is set, the result of any unquoted command
substitution, including the special form just mentioned, is eligible for
filename generation.
+A command with a leading pipe character, enclosed in braces prefixed by
+a dollar sign, as in `tt(${|)...tt(})', is executed in the current shell
+context, rather than in a subshell, and is replaced by the value of the
+parameter tt(REPLY) at the end of the command. There em(must not) be
+any whitespace between the opening brace and the pipe character. Any
+prior value of tt($REPLY) is saved and restored around this substitution,
+in the manner of a function local parameter. Other parameters declared
+within the substitution also behave as locals, as if in a function,
+unless `tt(typeset -g)' is used. Trailing newlines are em(not) deleted
+from the final replacement in this case, and it is subject to filename
+generation in the same way as `tt($LPAR())...tt(RPAR())' but is em(not)
+split on tt(IFS) unless the tt(SH_WORD_SPLIT) option is set.
+
+Substitutions of the form `tt(${|)var(param)tt(|)...tt(})' are similar,
+except that the substitution is replaced by the value of the parameter
+named by var(param). No implicit save or restore applies to var(param)
+except as noted for tt(REPLY), and var(param) should em(not) be declared
+within the command. If var(param) names an array, array expansion rules
+apply.
+
+A command enclosed in braces preceded by a dollar sign, and set off from
+the braces by whitespace, like `tt(${ )...tt( })', is replaced by its
+standard output. Like `tt(${|)...tt(})' and unlike
+`tt($LPAR())...tt(RPAR())', the command executes in the current shell
+context with function local behaviors and does not create a subshell.
+
+Note that because the `tt(${|)...tt(})' and `tt(${ )...tt( })' forms
+must be parsed at once as both string tokens and commands, all other
+braces (`tt({)' or `tt(})') within the command either must be quoted,
+or must appear in syntactically valid pairs, such as around complex
+commands, function bodies, or parameter references.
+
texinode(Arithmetic Expansion)(Brace Expansion)(Command Substitution)(Expansion)
sect(Arithmetic Expansion)
cindex(arithmetic expansion)
diff --git a/Src/lex.c b/Src/lex.c
index 2f7937410..33b17cc95 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -937,7 +937,7 @@ static enum lextok
gettokstr(int c, int sub)
{
int bct = 0, pct = 0, brct = 0, seen_brct = 0, fdpar = 0;
- int intpos = 1, in_brace_param = 0;
+ int intpos = 1, in_brace_param = 0, cmdsubst = 0;
int inquote, unmatched = 0;
enum lextok peek;
#ifdef DEBUG
@@ -1135,7 +1135,7 @@ gettokstr(int c, int sub)
c = Inpar;
break;
case LX2_INBRACE:
- if (isset(IGNOREBRACES) || sub)
+ if ((isset(IGNOREBRACES) && !cmdsubst) || sub)
c = '{';
else {
if (!lexbuf.len && incmdpos) {
@@ -1157,8 +1157,11 @@ gettokstr(int c, int sub)
if (in_brace_param) {
cmdpop();
}
- if (bct-- == in_brace_param)
- in_brace_param = 0;
+ if (bct-- == in_brace_param) {
+ if (cmdsubst)
+ cmdpop();
+ in_brace_param = cmdsubst = 0;
+ }
c = Outbrace;
break;
case LX2_COMMA:
@@ -1405,16 +1408,24 @@ gettokstr(int c, int sub)
}
add(c);
c = hgetc();
- if (intpos)
+ if (intpos)
intpos--;
- if (lexstop)
+ if (lexstop)
break;
+ if (!cmdsubst && in_brace_param && act == LX2_STRING &&
+ (c == '|' || c == Bar || inblank(c))) {
+ cmdsubst = in_brace_param;
+ cmdpush(CS_CURSH);
+ }
}
brk:
if (errflag) {
if (in_brace_param) {
- while(bct-- >= in_brace_param)
+ while(bct >= in_brace_param) {
+ if (bct-- == cmdsubst)
+ cmdpop();
cmdpop();
+ }
}
return LEXERR;
}
@@ -1422,8 +1433,11 @@ gettokstr(int c, int sub)
if (unmatched && !(lexflags & LEXFLAGS_ACTIVE))
zerr("unmatched %c", unmatched);
if (in_brace_param) {
- while(bct-- >= in_brace_param)
+ while(bct >= in_brace_param) {
+ if (bct-- == cmdsubst)
+ cmdpop();
cmdpop();
+ }
zerr("closing brace expected");
} else if (unset(IGNOREBRACES) && !sub && lexbuf.len > 1 &&
peek == STRING && lexbuf.ptr[-1] == '}' &&
@@ -1459,8 +1473,8 @@ gettokstr(int c, int sub)
static int
dquote_parse(char endchar, int sub)
{
- int pct = 0, brct = 0, bct = 0, intick = 0, err = 0;
- int c;
+ int pct = 0, brct = 0, bct = 0, intick = 0, err = 0, cmdsubst = 0;
+ int c, bskip = 0;
int math = endchar == ')' || endchar == ']' || infor;
int zlemath = math && zlemetacs > zlemetall + addedx - inbufct;
@@ -1529,11 +1543,25 @@ dquote_parse(char endchar, int sub)
c = Qstring;
}
break;
+ case '{':
+ if (cmdsubst && !intick) {
+ /* In nofork substitution, tokenize as if unquoted */
+ c = Inbrace;
+ bskip++;
+ }
+ break;
case '}':
if (intick || !bct)
break;
c = Outbrace;
- bct--;
+ if (bskip) {
+ bskip--;
+ break;
+ }
+ if (bct-- == cmdsubst) {
+ cmdsubst = 0;
+ cmdpop();
+ }
cmdpop();
break;
case '`':
@@ -1588,14 +1616,34 @@ dquote_parse(char endchar, int sub)
if (err || lexstop)
break;
add(c);
+ if (!cmdsubst && c == Inbrace) {
+ /* Check for ${|...} nofork command substitution */
+ if ((c = hgetc()) && !lexstop) {
+ if (c == '|' || inblank(c)) {
+ cmdsubst = bct;
+ cmdpush(CS_CURSH);
+ }
+ hungetc(c);
+ }
+ }
}
if (intick == 2)
ALLOWHIST
if (intick) {
cmdpop();
}
- while (bct--)
+ while (bct) {
+ if (bct-- == cmdsubst) {
+ /*
+ * You would think this is an error, but if we call it one,
+ * parsestrnoerr() returns nonzero to subst_parse_str() and
+ * subsequently "bad substitution" is not reported
+ */
+ /* err = 1 */
+ cmdpop();
+ }
cmdpop();
+ }
if (lexstop)
err = intick || endchar || err;
else if (err == 1) {
diff --git a/Src/subst.c b/Src/subst.c
index d68159227..52afd6484 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -1867,6 +1867,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
* joining the array into a string (for compatibility with ksh/bash).
*/
int quoted_array_with_offset = 0;
+ /* Indicates ${|...;} */
+ char *rplyvar = NULL;
+ /* Indicates ${ ... ;} */
+ char *rplytmp = NULL;
*s++ = '\0';
/*
@@ -1894,8 +1898,147 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
* flags in parentheses, but also one ksh hack.
*/
if (c == Inbrace) {
+ /* The command string to be run by ${|...;} */
+ char *cmdarg = NULL;
+ size_t slen = 0;
inbrace = 1;
s++;
+
+ /* Short-path for the nofork command substitution ${|cmd;}
+ * See other comments about kludges for why this is here.
+ *
+ * The command string is extracted and executed, and the
+ * substitution assigned. There's no (...)-flags processing,
+ * i.e. no ${|(U)cmd;}, because it looks quite awful and
+ * should not be part of command substitution in any case.
+ * Use ${(U)${|cmd;}} as you would for ${(U)$(cmd;)}.
+ */
+ if (*s == '|' || *s == Bar || inblank(*s)) {
+ char *outbracep = s;
+ char sav = *s;
+ *s = Inbrace;
+ if (skipparens(Inbrace, Outbrace, &outbracep) == 0) {
+ slen = outbracep - s - 1;
+ if ((*s = sav) != Bar) {
+ sav = *outbracep;
+ *outbracep = '\0';
+ tokenize(s);
+ *outbracep = sav;
+ }
+ }
+ }
+ if (slen > 1) {
+ char *outbracep = s + slen;
+ if (*outbracep == Outbrace) {
+ if ((rplyvar = itype_end(s+1, INAMESPC, 0))) {
+ if (*rplyvar == Inbrack &&
+ (rplyvar = parse_subscript(++rplyvar, 1, ']')))
+ ++rplyvar;
+ }
+ if (rplyvar == s+1 && *rplyvar == Bar) {
+ /* Is ${||...} a subtitution error or a syntax error?
+ zerr("bad substitution");
+ return NULL;
+ */
+ rplyvar = NULL;
+ }
+ if (rplyvar && *rplyvar == Bar) {
+ cmdarg = dupstrpfx(rplyvar+1, outbracep-rplyvar-1);
+ rplyvar = dupstrpfx(s+1,rplyvar-s-1);
+ } else {
+ cmdarg = dupstrpfx(s+1, outbracep-s-1);
+ rplyvar = "REPLY";
+ }
+ if (inblank(*s)) {
+ /*
+ * Admittedly a hack. Take advantage of the enforced
+ * locality of REPLY and the semantics of $(<file) to
+ * construct a command to write/read a temporary file.
+ * Then fall through to the regular handling of $REPLY
+ * to manage word splitting, expansion flags, etc.
+ */
+ char *outfmt = ">| %s { %s ;}"; /* 13 */
+ if ((rplytmp = gettempname(NULL, 1))) {
+ /* Prevent shenanigans with $TMPPREFIX */
+ char *tmpfile = quotestring(rplytmp, QT_BACKSLASH);
+ char *dummy = zhalloc(strlen(cmdarg) +
+ strlen(tmpfile) +
+ 13);
+ sprintf(dummy, outfmt, tmpfile, cmdarg);
+ cmdarg = dummy;
+ } else {
+ /* TMPPREFIX not writable? */
+ cmdoutval = lastval;
+ cmdarg = NULL;
+ }
+ }
+ s = outbracep;
+ }
+ }
+
+ if (rplyvar) {
+ Param pm;
+ /* char *rplyval = getsparam("REPLY"); */
+ startparamscope(); /* "local" behaves as if in a function */
+ pm = createparam("REPLY", PM_LOCAL|PM_UNSET);
+ if (pm) /* Shouldn't createparam() do this? */
+ pm->level = locallevel;
+ /* if (rplyval) setsparam("REPLY", ztrdup(rplyval)); */
+ }
+
+ if (rplyvar && cmdarg && *cmdarg) {
+ int obreaks = breaks;
+ Eprog cmdprog;
+ /* Execute the shell command */
+ untokenize(cmdarg);
+ cmdprog = parse_string(cmdarg, 0);
+ if (cmdprog) {
+ execode(cmdprog, 1, 0, "cmdsubst");
+ cmdoutval = lastval;
+ /* "return" behaves as if in a function */
+ if (retflag) {
+ retflag = 0;
+ breaks = obreaks; /* Is this ever not zero? */
+ }
+ } else /* parse error */
+ errflag |= ERRFLAG_ERROR;
+ if (rplytmp && !errflag) {
+ int onoerrs = noerrs;
+ noerrs = 2;
+ if ((cmdarg = ztuff(rplytmp)))
+ setsparam("REPLY", cmdarg);
+ noerrs = onoerrs;
+ }
+ }
+
+ if (rplytmp)
+ unlink(rplytmp);
+ if (rplyvar) {
+ if (strcmp(rplyvar, "REPLY") == 0) {
+ if ((val = dupstring(getsparam("REPLY"))))
+ vunset = 0;
+ else {
+ vunset = 1;
+ val = dupstring("");
+ }
+ } else {
+ s = dyncat(rplyvar, s);
+ rplyvar = NULL;
+ }
+ endparamscope();
+ if (exit_pending) {
+ if (mypid == getpid()) {
+ /*
+ * paranoia: don't check for jobs, but there
+ * shouldn't be any if not interactive.
+ */
+ stopmsg = 1;
+ zexit(exit_val, ZEXIT_NORMAL);
+ } else
+ _exit(exit_val);
+ }
+ }
+
/*
* In ksh emulation a leading `!' is a special flag working
* sort of like our (k). This is true only for arrays or
@@ -2590,14 +2733,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
* we let fetchvalue set the main string pointer s to
* the end of the bit it's fetched.
*/
- if (!(v = fetchvalue(&vbuf, (subexp ? &ov : &s),
- (wantt ? -1 :
- ((unset(KSHARRAYS) || inbrace) ? 1 : -1)),
- scanflags)) ||
- (v->pm && (v->pm->node.flags & PM_UNSET)) ||
- (v->flags & VALFLAG_EMPTY))
+ if (!rplyvar &&
+ (!(v = fetchvalue(&vbuf, (subexp ? &ov : &s),
+ (wantt ? -1 :
+ ((unset(KSHARRAYS) || inbrace) ? 1 : -1)),
+ scanflags)) ||
+ (v->pm && (v->pm->node.flags & PM_UNSET)) ||
+ (v->flags & VALFLAG_EMPTY)))
vunset = 1;
-
if (wantt) {
/*
* Handle the (t) flag: value now becomes the type
Messages sorted by:
Reverse Date,
Date,
Thread,
Author