Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: [bug] backslash stripped in sh/ksh emulation



Peter Stephenson <pws@xxxxxxx> wrote:
> So I've introduced a variant of Bnull, the ghost of a backslash, called
> Bnullkeep.  This is only inserted in the code used for globsubst, isn't
> removed by remnulargs(), and is explicitly ignored by pattern matching.  If
> the pattern match failed then untokenize() will restore the backslash to
> output the original string.

Er, and here's the actual code...

Note there is a minor fix to ztest.zsh which was garbling \'s in output
from diff because it used echo's backslash convention.

Index: Src/glob.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/glob.c,v
retrieving revision 1.46
diff -u -r1.46 glob.c
--- Src/glob.c	18 Aug 2005 10:17:52 -0000	1.46
+++ Src/glob.c	11 Oct 2005 11:12:35 -0000
@@ -2487,19 +2487,29 @@
 mod_export void
 tokenize(char *s)
 {
-    zshtokenize(s, 0);
+    zshtokenize(s, 0, 0);
 }
 
+/*
+ * shtokenize is used when we tokenize a string with GLOB_SUBST set.
+ * In that case we need to retain backslashes when we turn the
+ * pattern back into a string, so that the string is not
+ * modified if it failed to match a pattern.
+ *
+ * It may be modified by the effect of SH_GLOB which turns off
+ * various zsh-specific options.
+ */
+
 /**/
 mod_export void
 shtokenize(char *s)
 {
-    zshtokenize(s, isset(SHGLOB));
+    zshtokenize(s, 1, isset(SHGLOB));
 }
 
 /**/
 static void
-zshtokenize(char *s, int shglob)
+zshtokenize(char *s, int glbsbst, int shglob)
 {
     char *t;
     int bslash = 0;
@@ -2508,9 +2518,10 @@
       cont:
 	switch (*s) {
 	case Bnull:
+	case Bnullkeep:
 	case '\\':
 	    if (bslash) {
-		s[-1] = Bnull;
+		s[-1] = glbsbst ? Bnullkeep : Bnull;
 		break;
 	    }
 	    bslash = 1;
@@ -2519,7 +2530,7 @@
 	    if (shglob)
 		break;
 	    if (bslash) {
-		s[-1] = Bnull;
+		s[-1] = glbsbst ? Bnullkeep : Bnull;
 		break;
 	    }
 	    t = s;
@@ -2549,7 +2560,7 @@
 	    for (t = ztokens; *t; t++)
 		if (*t == *s) {
 		    if (bslash)
-			s[-1] = Bnull;
+			s[-1] = glbsbst ? Bnullkeep : Bnull;
 		    else
 			*s = (t - ztokens) + Pound;
 		    break;
@@ -2569,12 +2580,23 @@
 	char *o = s, c;
 
 	while ((c = *s++))
-	    if (INULL(c)) {
+	    if (c == Bnullkeep) {
+		/*
+		 * An active backslash that needs to be turned back into
+		 * a real backslash for output.  However, we don't
+		 * do that yet since we need to ignore it during
+		 * pattern matching.
+		 */
+		continue;
+	    } else if (INULL(c)) {
 		char *t = s - 1;
 
-		while ((c = *s++))
-		    if (!INULL(c))
+		while ((c = *s++)) {
+		    if (c == Bnullkeep)
+			*t++ = '\\';
+		    else if (!INULL(c))
 			*t++ = c;
+		}
 		*t = '\0';
 		if (!*o) {
 		    o[0] = Nularg;
Index: Src/lex.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/lex.c,v
retrieving revision 1.30
diff -u -r1.30 lex.c
--- Src/lex.c	10 Aug 2005 10:56:41 -0000	1.30
+++ Src/lex.c	11 Oct 2005 11:12:35 -0000
@@ -33,7 +33,7 @@
 /* tokens */
 
 /**/
-mod_export char ztokens[] = "#$^*()$=|{}[]`<>?~`,'\"\\";
+mod_export char ztokens[] = "#$^*()$=|{}[]`<>?~`,'\"\\\\";
 
 /* parts of the current token */
 
Index: Src/pattern.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/pattern.c,v
retrieving revision 1.28
diff -u -r1.28 pattern.c
--- Src/pattern.c	20 Sep 2005 15:10:27 -0000	1.28
+++ Src/pattern.c	11 Oct 2005 11:12:36 -0000
@@ -260,13 +260,13 @@
 
 static char endstr[] = {
     '/',			/* file only */
-    '\0', Bar, Outpar, Quest, Star, Inbrack, Inpar, Inang,
+    '\0', Bar, Outpar, Quest, Star, Inbrack, Inpar, Inang, Bnullkeep,
 				/* all patterns */
     Tilde, Hat, Pound		/* extended glob only */
 };
 
-#define PATENDSTRLEN_NORM 9
-#define PATENDSTRLEN_EXT  12
+#define PATENDSTRLEN_NORM 10
+#define PATENDSTRLEN_EXT  13
 
 
 /* Default size for pattern buffer */
@@ -1240,6 +1240,13 @@
 	     */
 	    return 0;
 	    break;
+	case Bnullkeep:
+	    /*
+	     * Marker for restoring a backslash in output:
+	     * does not match a character.
+	     */
+	    return patcomppiece(flagp);
+	    break;
 #ifdef DEBUG
 	default:
 	    dputs("BUG: character not handled in patcomppiece");
Index: Src/subst.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/subst.c,v
retrieving revision 1.40
diff -u -r1.40 subst.c
--- Src/subst.c	7 Dec 2004 16:55:03 -0000	1.40
+++ Src/subst.c	11 Oct 2005 11:12:36 -0000
@@ -1945,7 +1945,7 @@
 	     */
 	    for (ptr = s; (c = *ptr) && c != '/'; ptr++)
 	    {
-		if ((c == Bnull || c == '\\') && ptr[1])
+		if ((c == Bnull || c == Bnullkeep || c == '\\') && ptr[1])
 		{
 		    if (ptr[1] == '/')
 			chuck(ptr);
@@ -2846,11 +2846,11 @@
 		}
 		zsfree(hsubr);
 		for (tt = hsubl; *tt; tt++)
-		    if (INULL(*tt))
+		    if (INULL(*tt) && *tt != Bnullkeep)
 			chuck(tt--);
 		untokenize(hsubl);
 		for (tt = hsubr = ztrdup(ptr2); *tt; tt++)
-		    if (INULL(*tt))
+		    if (INULL(*tt) && *tt != Bnullkeep)
 			chuck(tt--);
 		ptr2[-1] = del;
 		if (sav)
Index: Src/zsh.h
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/zsh.h,v
retrieving revision 1.76
diff -u -r1.76 zsh.h
--- Src/zsh.h	8 Aug 2005 16:49:11 -0000	1.76
+++ Src/zsh.h	11 Oct 2005 11:12:36 -0000
@@ -120,7 +120,10 @@
 
 #define DEFAULT_IFS	" \t\n\203 "
 
-/* Character tokens */
+/*
+ * Character tokens.
+ * These should match the characters in ztokens, defined in lex.c
+ */
 #define Pound		((char) 0x84)
 #define String		((char) 0x85)
 #define Hat		((char) 0x86)
@@ -141,15 +144,33 @@
 #define Tilde		((char) 0x95)
 #define Qtick		((char) 0x96)
 #define Comma		((char) 0x97)
+/*
+ * Null arguments: placeholders for single and double quotes
+ * and backslashes.
+ */
 #define Snull		((char) 0x98)
 #define Dnull		((char) 0x99)
 #define Bnull		((char) 0x9a)
-#define Nularg		((char) 0x9b)
+/*
+ * Backslash which will be returned to "\" instead of being stripped
+ * when we turn the string into a printable format.
+ */
+#define Bnullkeep       ((char) 0x9b)
+/*
+ * Null argument that does not correspond to any character.
+ * This should be last as it does not appear in ztokens and
+ * is used to initialise the IMETA type in inittyptab().
+ */
+#define Nularg		((char) 0x9c)
 
-#define INULL(x)	(((x) & 0xfc) == 0x98)
+#define INULL(x)	(((x) & 0xf8) == 0x98)
 
+/*
+ * Take care to update the use of IMETA appropriately when adding
+ * tokens here.
+ */
 /* Marker used in paramsubst for rc_expand_param */
-#define Marker		((char) 0x9c)
+#define Marker		((char) 0xa0)
 
 /* chars that need to be quoted if meant literally */
 
Index: Test/D04parameter.ztst
===================================================================
RCS file: /cvsroot/zsh/zsh/Test/D04parameter.ztst,v
retrieving revision 1.12
diff -u -r1.12 D04parameter.ztst
--- Test/D04parameter.ztst	22 Aug 2005 11:43:36 -0000	1.12
+++ Test/D04parameter.ztst	11 Oct 2005 11:12:36 -0000
@@ -196,6 +196,20 @@
 >* boringfile evenmoreboringfile boringfile evenmoreboringfile
 >boringfile evenmoreboringfile
 
+# The following tests a bug where globsubst didn't preserve
+# backslashes when printing out the original string.
+  str1='\\*\\'
+  (
+  setopt globsubst nonomatch
+  [[ \\\\ = $str1 ]] && print -r '\\ matched by' $str1
+  [[ \\foo\\ = $str1 ]] && print -r '\\foo matched by' $str1
+  [[ a\\b\\ = $str1 ]] || print -r 'a\\b not matched by' $str1
+  )
+0:globsubst with backslashes
+>\\ matched by \\*\\
+>\\foo matched by \\*\\
+>a\\b not matched by \\*\\
+
   print -l "${$(print one word)}" "${=$(print two words)}"
 0:splitting of $(...) inside ${...}
 >one word
Index: Test/ztst.zsh
===================================================================
RCS file: /cvsroot/zsh/zsh/Test/ztst.zsh,v
retrieving revision 1.22
diff -u -r1.22 ztst.zsh
--- Test/ztst.zsh	9 Aug 2005 06:51:40 -0000	1.22
+++ Test/ztst.zsh	11 Oct 2005 11:12:36 -0000
@@ -280,7 +280,7 @@
   diff_out=$(diff "$@")
   diff_ret="$?"
   if [[ "$diff_ret" != "0" ]]; then
-    echo "$diff_out"
+    print -r "$diff_out"
   fi
 
   return "$diff_ret"


-- 
Peter Stephenson <pws@xxxxxxx>                  Software Engineer
CSR PLC, Churchill House, Cambridge Business Park, Cowley Road
Cambridge, CB4 0WZ, UK                          Tel: +44 (0)1223 692070


This message has been scanned for viruses by BlackSpider MailControl - www.blackspider.com



Messages sorted by: Reverse Date, Date, Thread, Author