Zsh Mailing List Archive
Messages sorted by:
Reverse Date,
Date,
Thread,
Author
Re: [bug] backslash stripped in sh/ksh emulation
- X-seq: zsh-workers 21863
- From: Peter Stephenson <pws@xxxxxxx>
- To: zsh-workers@xxxxxxxxxx
- Subject: Re: [bug] backslash stripped in sh/ksh emulation
- Date: Tue, 11 Oct 2005 12:43:28 +0100
- In-reply-to: <20051011123624.70fc9626.pws@xxxxxxx>
- Mailing-list: contact zsh-workers-help@xxxxxxxxxx; run by ezmlm
- Organization: Cambridge Silicon Radio
- References: <20051011083842.GA5380@sc> <20051011123624.70fc9626.pws@xxxxxxx>
Peter Stephenson <pws@xxxxxxx> wrote:
> So I've introduced a variant of Bnull, the ghost of a backslash, called
> Bnullkeep. This is only inserted in the code used for globsubst, isn't
> removed by remnulargs(), and is explicitly ignored by pattern matching. If
> the pattern match failed then untokenize() will restore the backslash to
> output the original string.
Er, and here's the actual code...
Note there is a minor fix to ztest.zsh which was garbling \'s in output
from diff because it used echo's backslash convention.
Index: Src/glob.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/glob.c,v
retrieving revision 1.46
diff -u -r1.46 glob.c
--- Src/glob.c 18 Aug 2005 10:17:52 -0000 1.46
+++ Src/glob.c 11 Oct 2005 11:12:35 -0000
@@ -2487,19 +2487,29 @@
mod_export void
tokenize(char *s)
{
- zshtokenize(s, 0);
+ zshtokenize(s, 0, 0);
}
+/*
+ * shtokenize is used when we tokenize a string with GLOB_SUBST set.
+ * In that case we need to retain backslashes when we turn the
+ * pattern back into a string, so that the string is not
+ * modified if it failed to match a pattern.
+ *
+ * It may be modified by the effect of SH_GLOB which turns off
+ * various zsh-specific options.
+ */
+
/**/
mod_export void
shtokenize(char *s)
{
- zshtokenize(s, isset(SHGLOB));
+ zshtokenize(s, 1, isset(SHGLOB));
}
/**/
static void
-zshtokenize(char *s, int shglob)
+zshtokenize(char *s, int glbsbst, int shglob)
{
char *t;
int bslash = 0;
@@ -2508,9 +2518,10 @@
cont:
switch (*s) {
case Bnull:
+ case Bnullkeep:
case '\\':
if (bslash) {
- s[-1] = Bnull;
+ s[-1] = glbsbst ? Bnullkeep : Bnull;
break;
}
bslash = 1;
@@ -2519,7 +2530,7 @@
if (shglob)
break;
if (bslash) {
- s[-1] = Bnull;
+ s[-1] = glbsbst ? Bnullkeep : Bnull;
break;
}
t = s;
@@ -2549,7 +2560,7 @@
for (t = ztokens; *t; t++)
if (*t == *s) {
if (bslash)
- s[-1] = Bnull;
+ s[-1] = glbsbst ? Bnullkeep : Bnull;
else
*s = (t - ztokens) + Pound;
break;
@@ -2569,12 +2580,23 @@
char *o = s, c;
while ((c = *s++))
- if (INULL(c)) {
+ if (c == Bnullkeep) {
+ /*
+ * An active backslash that needs to be turned back into
+ * a real backslash for output. However, we don't
+ * do that yet since we need to ignore it during
+ * pattern matching.
+ */
+ continue;
+ } else if (INULL(c)) {
char *t = s - 1;
- while ((c = *s++))
- if (!INULL(c))
+ while ((c = *s++)) {
+ if (c == Bnullkeep)
+ *t++ = '\\';
+ else if (!INULL(c))
*t++ = c;
+ }
*t = '\0';
if (!*o) {
o[0] = Nularg;
Index: Src/lex.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/lex.c,v
retrieving revision 1.30
diff -u -r1.30 lex.c
--- Src/lex.c 10 Aug 2005 10:56:41 -0000 1.30
+++ Src/lex.c 11 Oct 2005 11:12:35 -0000
@@ -33,7 +33,7 @@
/* tokens */
/**/
-mod_export char ztokens[] = "#$^*()$=|{}[]`<>?~`,'\"\\";
+mod_export char ztokens[] = "#$^*()$=|{}[]`<>?~`,'\"\\\\";
/* parts of the current token */
Index: Src/pattern.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/pattern.c,v
retrieving revision 1.28
diff -u -r1.28 pattern.c
--- Src/pattern.c 20 Sep 2005 15:10:27 -0000 1.28
+++ Src/pattern.c 11 Oct 2005 11:12:36 -0000
@@ -260,13 +260,13 @@
static char endstr[] = {
'/', /* file only */
- '\0', Bar, Outpar, Quest, Star, Inbrack, Inpar, Inang,
+ '\0', Bar, Outpar, Quest, Star, Inbrack, Inpar, Inang, Bnullkeep,
/* all patterns */
Tilde, Hat, Pound /* extended glob only */
};
-#define PATENDSTRLEN_NORM 9
-#define PATENDSTRLEN_EXT 12
+#define PATENDSTRLEN_NORM 10
+#define PATENDSTRLEN_EXT 13
/* Default size for pattern buffer */
@@ -1240,6 +1240,13 @@
*/
return 0;
break;
+ case Bnullkeep:
+ /*
+ * Marker for restoring a backslash in output:
+ * does not match a character.
+ */
+ return patcomppiece(flagp);
+ break;
#ifdef DEBUG
default:
dputs("BUG: character not handled in patcomppiece");
Index: Src/subst.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/subst.c,v
retrieving revision 1.40
diff -u -r1.40 subst.c
--- Src/subst.c 7 Dec 2004 16:55:03 -0000 1.40
+++ Src/subst.c 11 Oct 2005 11:12:36 -0000
@@ -1945,7 +1945,7 @@
*/
for (ptr = s; (c = *ptr) && c != '/'; ptr++)
{
- if ((c == Bnull || c == '\\') && ptr[1])
+ if ((c == Bnull || c == Bnullkeep || c == '\\') && ptr[1])
{
if (ptr[1] == '/')
chuck(ptr);
@@ -2846,11 +2846,11 @@
}
zsfree(hsubr);
for (tt = hsubl; *tt; tt++)
- if (INULL(*tt))
+ if (INULL(*tt) && *tt != Bnullkeep)
chuck(tt--);
untokenize(hsubl);
for (tt = hsubr = ztrdup(ptr2); *tt; tt++)
- if (INULL(*tt))
+ if (INULL(*tt) && *tt != Bnullkeep)
chuck(tt--);
ptr2[-1] = del;
if (sav)
Index: Src/zsh.h
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/zsh.h,v
retrieving revision 1.76
diff -u -r1.76 zsh.h
--- Src/zsh.h 8 Aug 2005 16:49:11 -0000 1.76
+++ Src/zsh.h 11 Oct 2005 11:12:36 -0000
@@ -120,7 +120,10 @@
#define DEFAULT_IFS " \t\n\203 "
-/* Character tokens */
+/*
+ * Character tokens.
+ * These should match the characters in ztokens, defined in lex.c
+ */
#define Pound ((char) 0x84)
#define String ((char) 0x85)
#define Hat ((char) 0x86)
@@ -141,15 +144,33 @@
#define Tilde ((char) 0x95)
#define Qtick ((char) 0x96)
#define Comma ((char) 0x97)
+/*
+ * Null arguments: placeholders for single and double quotes
+ * and backslashes.
+ */
#define Snull ((char) 0x98)
#define Dnull ((char) 0x99)
#define Bnull ((char) 0x9a)
-#define Nularg ((char) 0x9b)
+/*
+ * Backslash which will be returned to "\" instead of being stripped
+ * when we turn the string into a printable format.
+ */
+#define Bnullkeep ((char) 0x9b)
+/*
+ * Null argument that does not correspond to any character.
+ * This should be last as it does not appear in ztokens and
+ * is used to initialise the IMETA type in inittyptab().
+ */
+#define Nularg ((char) 0x9c)
-#define INULL(x) (((x) & 0xfc) == 0x98)
+#define INULL(x) (((x) & 0xf8) == 0x98)
+/*
+ * Take care to update the use of IMETA appropriately when adding
+ * tokens here.
+ */
/* Marker used in paramsubst for rc_expand_param */
-#define Marker ((char) 0x9c)
+#define Marker ((char) 0xa0)
/* chars that need to be quoted if meant literally */
Index: Test/D04parameter.ztst
===================================================================
RCS file: /cvsroot/zsh/zsh/Test/D04parameter.ztst,v
retrieving revision 1.12
diff -u -r1.12 D04parameter.ztst
--- Test/D04parameter.ztst 22 Aug 2005 11:43:36 -0000 1.12
+++ Test/D04parameter.ztst 11 Oct 2005 11:12:36 -0000
@@ -196,6 +196,20 @@
>* boringfile evenmoreboringfile boringfile evenmoreboringfile
>boringfile evenmoreboringfile
+# The following tests a bug where globsubst didn't preserve
+# backslashes when printing out the original string.
+ str1='\\*\\'
+ (
+ setopt globsubst nonomatch
+ [[ \\\\ = $str1 ]] && print -r '\\ matched by' $str1
+ [[ \\foo\\ = $str1 ]] && print -r '\\foo matched by' $str1
+ [[ a\\b\\ = $str1 ]] || print -r 'a\\b not matched by' $str1
+ )
+0:globsubst with backslashes
+>\\ matched by \\*\\
+>\\foo matched by \\*\\
+>a\\b not matched by \\*\\
+
print -l "${$(print one word)}" "${=$(print two words)}"
0:splitting of $(...) inside ${...}
>one word
Index: Test/ztst.zsh
===================================================================
RCS file: /cvsroot/zsh/zsh/Test/ztst.zsh,v
retrieving revision 1.22
diff -u -r1.22 ztst.zsh
--- Test/ztst.zsh 9 Aug 2005 06:51:40 -0000 1.22
+++ Test/ztst.zsh 11 Oct 2005 11:12:36 -0000
@@ -280,7 +280,7 @@
diff_out=$(diff "$@")
diff_ret="$?"
if [[ "$diff_ret" != "0" ]]; then
- echo "$diff_out"
+ print -r "$diff_out"
fi
return "$diff_ret"
--
Peter Stephenson <pws@xxxxxxx> Software Engineer
CSR PLC, Churchill House, Cambridge Business Park, Cowley Road
Cambridge, CB4 0WZ, UK Tel: +44 (0)1223 692070
This message has been scanned for viruses by BlackSpider MailControl - www.blackspider.com
Messages sorted by:
Reverse Date,
Date,
Thread,
Author