Zsh Mailing List Archive
Messages sorted by:
Reverse Date,
Date,
Thread,
Author
Re: a='foo"'; echo ${a/foo"/"bar} outputs bar
- X-seq: zsh-workers 52202
- From: Bart Schaefer <schaefer@xxxxxxxxxxxxxxxx>
- To: Zsh hackers list <zsh-workers@xxxxxxx>
- Subject: Re: a='foo"'; echo ${a/foo"/"bar} outputs bar
- Date: Tue, 3 Oct 2023 21:20:27 -0700
- Archived-at: <https://zsh.org/workers/52202>
- In-reply-to: <CAH+w=7aoQsn5BeiWG3+RmsgoUU0aQxsDCJX6=JOubbexPDxYzg@mail.gmail.com>
- List-id: <zsh-workers.zsh.org>
- References: <20221211175001.u7dqfrnnzylsbjan@chazelas.org> <CAH+w=7ZZwEpA9Wekz5wEQgkoDT0SQ6ht2VkmYgAYtx6zqoNs9g@mail.gmail.com> <CAH+w=7aoQsn5BeiWG3+RmsgoUU0aQxsDCJX6=JOubbexPDxYzg@mail.gmail.com>
On Sun, Aug 6, 2023 at 7:36 PM Bart Schaefer <schaefer@xxxxxxxxxxxxxxxx> wrote:
>
> Circling back to this ...
... and again ...
> With an increased understanding of lex.c gained by stepping through
> variations of ${|...} a thousand times, I think I have identified two
> possible ways to address this.
Turns out there's a fundamental problem with ${var/pat/repl}, namely
that upon reaching paramsubst() the only recognized quoting of the
slash character is to precede it with a backslash. Conversely,
gettokstr() recognizes '...', "...", and $'...' quoting, and may
return STRING tokens containing backslashes that paramsubst() won't be
able to deal with.
Other things that have never worked are e.g. ${a/${b/c/d}/e}, which
gives bad substitution, and ${a/"${b/c/d}"/e} which AFAICT always
substitutes ${a} with no replacements, and ${a/"$( /bin/echo d )"/e}
which is bad pattern: "$(. Fixing this properly would require doing a
full-fledged parse inside paramsubst(), rather than just a search for
unquoted backslashes.
Several of my attempts so far have caused at least Test/Y01 to fail in
comptesteval, so there is evidence for dependency on the existing
behavior.
> Something similar would have to be done with single quotes
I had an idea here that seems to work: while reading single-quoted
strings in gettokstr(), recognize when we're in a pattern and replace
all / with \/. That only fixes '...' and $'...' at best, though.
So, back to workers/51202, my original reply on this thread: handle
double-quotes in paramsubst(), leaving single quotes to gettokstr().
This still isn't perfect (see above regarding ${a/${b/c/d}/e}) but it
fixes the two Xfail tests in D04parameter and handles most of the
straightforward cases, including
a='/x/y/z' zsh -fc 'echo "${a/"/y/"/+}"'
and even the ${a/"${b/c/d}"/e} example, although it'll break on any
more double quotes inside other double quotes.
Other existing tests still pass. Let me know what you think.
diff --git a/Src/lex.c b/Src/lex.c
index 33b17cc95..31b130b07 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -938,7 +938,7 @@ gettokstr(int c, int sub)
{
int bct = 0, pct = 0, brct = 0, seen_brct = 0, fdpar = 0;
int intpos = 1, in_brace_param = 0, cmdsubst = 0;
- int inquote, unmatched = 0;
+ int inquote, unmatched = 0, in_pattern = 0;
enum lextok peek;
#ifdef DEBUG
int ocmdsp = cmdsp;
@@ -1160,7 +1160,7 @@ gettokstr(int c, int sub)
if (bct-- == in_brace_param) {
if (cmdsubst)
cmdpop();
- in_brace_param = cmdsubst = 0;
+ in_brace_param = cmdsubst = in_pattern = 0;
}
c = Outbrace;
break;
@@ -1309,7 +1309,8 @@ gettokstr(int c, int sub)
lexbuf.ptr--, lexbuf.len--;
else
break;
- }
+ } else if (in_pattern && c == '/')
+ add(Bnull);
add(c);
}
ALLOWHIST
@@ -1397,26 +1398,36 @@ gettokstr(int c, int sub)
*/
c = Dash;
break;
- case LX2_BANG:
- /*
- * Same logic as Dash, for ! to perform negation in range.
- */
- if (seen_brct)
- c = Bang;
- else
- c = '!';
- }
- add(c);
- c = hgetc();
- if (intpos)
+ case LX2_BANG:
+ /*
+ * Same logic as Dash, for ! to perform negation in range.
+ */
+ if (seen_brct)
+ c = Bang;
+ else
+ c = '!';
+ case LX2_OTHER:
+ if (in_brace_param) {
+ if (c == '/') {
+ if (in_pattern == 0)
+ in_pattern = 2;
+ else
+ --in_pattern;
+ }
+ }
+ }
+ add(c);
+ c = hgetc();
+ if (intpos)
intpos--;
- if (lexstop)
+ if (lexstop)
break;
- if (!cmdsubst && in_brace_param && act == LX2_STRING &&
- (c == '|' || c == Bar || inblank(c))) {
- cmdsubst = in_brace_param;
- cmdpush(CS_CURSH);
- }
+ if (!cmdsubst && in_brace_param && act == LX2_STRING &&
+ (c == '|' || c == Bar || inblank(c))) {
+ cmdsubst = in_brace_param;
+ cmdpush(CS_CURSH);
+ } else if (in_pattern == 2 && c != '/')
+ in_pattern = 1;
}
brk:
if (errflag) {
diff --git a/Src/subst.c b/Src/subst.c
index cdbfc138a..419069f5a 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -3088,6 +3088,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
chuck(ptr);
else
ptr++;
+ } else if (c == Dnull) {
+ chuck(ptr);
+ while (*ptr && *ptr != c)
+ ptr++;
+ if (*ptr == Dnull)
+ chuck(ptr);
+ ptr--; /* Outer loop is about to increment */
}
}
replstr = (*ptr && ptr[1]) ? ptr+1 : "";
diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst
index c2008582c..69a4fd3ec 100644
--- a/Test/D04parameter.ztst
+++ b/Test/D04parameter.ztst
@@ -2762,13 +2762,13 @@ F:behavior, see http://austingroupbugs.net/view.php?id=888
slash='/'
print -r -- x${slash/'/'}y
--Df:(users/28784) substituting a single-quoted backslash, part #1: slash
+0:(users/28784) substituting a single-quoted backslash, part #1: slash
>xy
single_quote="'"
- print -r -- x${single_quote/'/'}y
--Df:(users/28784) substituting a single-quoted backslash, part #2: single quote
->x/y
+ print -r -- x${single_quote/$'/'}y
+0:(users/28784) substituting a single-quoted backslash, part #2: single quote
+>x'y
control="foobar"
print -r -- x${control/'bar'}y
Messages sorted by:
Reverse Date,
Date,
Thread,
Author