Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

PATCH: $' completion, the story so far



Here's my attempt so far to make $' quoting work a little better in
completion.

I've been working on this so long the protons in it are starting to
decay.  It's still not finished, but it's going in the right direction
and I think it's better than it used to be.

I've made the interface to $'...' handling a little more rationale; it's
now also a bit less efficient but it's possible to call it without
feeling dirty.

If you want to see what's still wrong, try

su -c "ls $'<file>

and you'll get a whole load of unnecessary backslashes when a word is
inserted (for some reason listing alone works OK).

I haven't even begun to try to fix up the use of $' to quote characters
that aren't valid in the current locale.  I have a suspicion that could
take quite a lot more work.

Index: Src/exec.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/exec.c,v
retrieving revision 1.106
diff -u -r1.106 exec.c
--- Src/exec.c	19 Nov 2006 21:34:59 -0000	1.106
+++ Src/exec.c	3 Dec 2006 20:48:23 -0000
@@ -2905,7 +2905,7 @@
 	    qt = 1;
 	    break;
 	}
-    quotesubst(str);
+    str = quotesubst(str);
     untokenize(str);
     if (typ == REDIR_HEREDOCDASH) {
 	strip = 1;
Index: Src/subst.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/subst.c,v
retrieving revision 1.68
diff -u -r1.68 subst.c
--- Src/subst.c	11 Nov 2006 13:16:10 -0000	1.68
+++ Src/subst.c	3 Dec 2006 20:48:24 -0000
@@ -122,6 +122,42 @@
     unqueue_signals();
 }
 
+/*
+ * Perform $'...' quoting.  The arguments are
+ *   strstart   The start of the string
+ *   pstrdpos   Initially, *pstrdpos is the position where the $ of the $'
+ *              occurs.  It will be updated to the next character after the
+ *              last ' of the $'...'.
+ * The return value is the entire allocated string from strstart on the heap.
+ * Note the original string may be modified in the process.
+ */
+/**/
+static char *
+stringsubstquote(char *strstart, char **pstrdpos)
+{
+    int len;
+    char *strdpos = *pstrdpos, *strsub, *strret;
+
+    strsub = getkeystring(strdpos+2, &len,
+			  GETKEYS_DOLLARS_QUOTE, NULL);
+    len += 2;			/* measured from strdpos */
+
+    if (strstart != strdpos) {
+	*strdpos = '\0';
+	if (strdpos[len])
+	    strret = zhtricat(strstart, strsub, strdpos + len);
+	else
+	    strret = dyncat(strstart, strsub);
+    } else if (strdpos[len])
+	strret = dyncat(strsub, strdpos + len);
+    else
+	strret = strsub;
+
+    *pstrdpos = strret + (strdpos - strstart) + strlen(strsub);
+
+    return strret;
+}
+
 /**/
 static LinkNode
 stringsubst(LinkList list, LinkNode node, int ssub, int asssub)
@@ -150,7 +186,8 @@
 		setdata(node, (void *) str3);
 		continue;
 	    } else if (c == Snull) {
-		str = getkeystring(str, NULL, GETKEYS_DOLLARS_QUOTE, NULL);
+		str3 = stringsubstquote(str3, &str);
+		setdata(node, (void *) str3);
 		continue;
 	    } else {
 		node = paramsubst(list, node, &str, qt, ssub);
@@ -262,22 +299,25 @@
  * The remnulargs() makes this consistent with the other forms
  * of substitution, indicating that quotes have been fully
  * processed.
+ *
+ * The fully processed string is returned.
  */
 
 /**/
-void
+char *
 quotesubst(char *str)
 {
     char *s = str;
 
     while (*s) {
 	if (*s == String && s[1] == Snull) {
-	    s = getkeystring(s, NULL, GETKEYS_DOLLARS_QUOTE, NULL);
+	    str = stringsubstquote(str, &s);
 	} else {
 	    s++;
 	}
     }
     remnulargs(str);
+    return str;
 }
 
 /**/
Index: Src/utils.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/utils.c,v
retrieving revision 1.145
diff -u -r1.145 utils.c
--- Src/utils.c	8 Nov 2006 10:38:06 -0000	1.145
+++ Src/utils.c	3 Dec 2006 20:48:26 -0000
@@ -4563,10 +4563,14 @@
 
 /*
  * Decode a key string, turning it into the literal characters.
+ * The value returned is a newly allocated string from the heap.
  * The length is (usually) returned in *len.
  * how is a set of bits from the GETKEY_ values defined in zsh.h;
  * not all combinations of bits are useful.  Callers will typically
  * use one of the GETKEYS_ values which define sets of bits.
+ *
+ * The return value is unmetafied unless GETKEY_DOLLAR_QUOTE is
+ * in use.
  */
 
 /**/
@@ -4574,7 +4578,7 @@
 getkeystring(char *s, int *len, int how, int *misc)
 {
     char *buf, tmp[1];
-    char *t, *u = NULL;
+    char *t, *tdest = NULL, *u = NULL, *sstart = s;
     char svchar = '\0';
     int meta = 0, control = 0;
     int i;
@@ -4591,16 +4595,42 @@
 # endif
 #endif
 
+    DPUTS((how & GETKEY_UPDATE_OFFSET) &&
+	  (how & ~(GETKEY_DOLLAR_QUOTE|GETKEY_UPDATE_OFFSET)),
+	  "BUG: offset updating in getkeystring only supported with $'.");
+
     if (how & GETKEY_SINGLE_CHAR)
 	t = buf = tmp;
-    else if (!(how & GETKEY_DOLLAR_QUOTE))
+    else
 	t = buf = zhalloc(strlen(s) + 1);
-    else {
-	t = buf = s;
-	s += 2;
+    if (how & GETKEY_DOLLAR_QUOTE) {
+	/*
+	 * TODO: we're not necessarily guaranteed the output string will
+	 * be no longer than the input with \u and \U when output
+	 * characters need to be metafied: should check the maximum
+	 * length.
+	 *
+	 * We're going to unmetafy into the original string, but
+	 * to get a proper metafied input we're going to metafy
+	 * into an allocated buffer.  This is necessary if we have
+	 * \u and \U's with multiple metafied bytes.  We can't
+	 * simply remetafy the entire string because there may
+	 * be tokens (indeed, we know there are lexical nulls floating
+	 * around), so we have to be aware character by character
+	 * what we are converting.
+	 */
+	tdest = t;
+	t = s;
     }
     for (; *s; s++) {
+	char *torig = t;
 	if (*s == '\\' && s[1]) {
+	    int miscadded;
+	    if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc) {
+		(*misc)++;
+		miscadded = 1;
+	    } else
+		miscadded = 0;
 	    switch (*++s) {
 	    case 'a':
 #ifdef __STDC__
@@ -4630,6 +4660,8 @@
 	    case 'E':
 		if (!(how & GETKEY_EMACS)) {
 		    *t++ = '\\', s--;
+		    if (miscadded)
+			(*misc)--;
 		    continue;
 		}
 		/* FALL THROUGH */
@@ -4641,18 +4673,26 @@
 		    if (s[1] == '-')
 			s++;
 		    meta = 1 + control;	/* preserve the order of ^ and meta */
-		} else
+		} else {
+		    if (miscadded)
+			(*misc)--;
 		    *t++ = '\\', s--;
+		}
 		continue;
 	    case 'C':
 		if (how & GETKEY_EMACS) {
 		    if (s[1] == '-')
 			s++;
 		    control = 1;
-		} else
+		} else {
+		    if (miscadded)
+			(*misc)--;
 		    *t++ = '\\', s--;
+		}
 		continue;
 	    case Meta:
+		if (miscadded)
+		    (*misc)--;
 		*t++ = '\\', s--;
 		break;
 	    case '-':
@@ -4670,7 +4710,17 @@
 		}
 		goto def;
 	    case 'u':
+		if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc)
+		    (*misc) += 4;
 	    case 'U':
+		if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc) {
+		    (*misc) += 6;
+		    /*
+		     * We've now adjusted the offset for all the input
+		     * characters, so we need to subtract for each
+		     * byte of output below.
+		     */
+		}
 	    	wval = 0;
 		for (i=(*s == 'u' ? 4 : 8); i>0; i--) {
 		    if (*++s && idigit(*s))
@@ -4692,19 +4742,29 @@
 		if (count == -1) {
 		    zerr("character not in range");
 		    if (how & GETKEY_DOLLAR_QUOTE) {
-			for (u = t; (*u++ = *++s););
+			/* HERE new convention */
+			for (u = t; (*u++ = *++s);) {
+			    if ((how & GETKEY_UPDATE_OFFSET) &&
+				s - sstart > *misc)
+				(*misc)++;
+			}
 			return t;
 		    }
 		    *t = '\0';
 		    *len = t - buf;
 		    return buf;
 		}
+		if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc)
+		    (*misc) += count;
 		t += count;
 		continue;
 # else
 #  if defined(HAVE_NL_LANGINFO) && defined(CODESET)
 		if (!strcmp(nl_langinfo(CODESET), "UTF-8")) {
-		    t += ucs4toutf8(t, wval);
+		    count = ucs4toutf8(t, wval);
+		    t += count;
+		    if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc)
+			(*misc) += count;
 		    continue;
 		} else {
 #   ifdef HAVE_ICONV
@@ -4721,7 +4781,12 @@
 		    if (cd == (iconv_t)-1) {
 			zerr("cannot do charset conversion");
 			if (how & GETKEY_DOLLAR_QUOTE) {
-			    for (u = t; (*u++ = *++s););
+			    /* HERE: new convention */
+			    for (u = t; (*u++ = *++s);) {
+				if ((how & GETKEY_UPDATE_OFFSET) &&
+				    s - sstart > *misc)
+				    (*misc)++;
+			    }
 			    return t;
 			}
 			*t = '\0';
@@ -4736,6 +4801,8 @@
 			*len = t - buf;
 			return buf;
 		    }
+		    if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc)
+			(*misc) += count;
 		    continue;
 #   else
                     zerr("cannot do charset conversion");
@@ -4775,15 +4842,20 @@
 		    }
 		    s--;
 		} else {
-		    if (!(how & GETKEY_EMACS) && *s != '\\')
+		    if (!(how & GETKEY_EMACS) && *s != '\\') {
+			if (miscadded)
+			    (*misc)--;
 			*t++ = '\\';
+		    }
 		    *t++ = *s;
 		}
 		break;
 	    }
 	} else if ((how & GETKEY_DOLLAR_QUOTE) && *s == Snull) {
-	    for (u = t; (*u++ = *s++););
-	    return t + 1;
+	    /* return length to following character */
+	    *len = (s - sstart) + 1;
+	    *tdest = '\0';
+	    return buf;
 	} else if (*s == '^' && !control && (how & GETKEY_CTRL) && s[1]) {
 	    control = 1;
 	    continue;
@@ -4801,8 +4873,25 @@
 
 	} else if (*s == Meta)
 	    *t++ = *++s ^ 32;
-	else
+	else {
 	    *t++ = *s;
+	    if (itok(*s)) {
+		if (meta || control) {
+		    /*
+		     * Presumably we should be using meta or control
+		     * on the character representing the token.
+		     */
+		    *s = ztokens[*s - Pound];
+		} else if (how & GETKEY_DOLLAR_QUOTE) {
+		    /*
+		     * We don't want to metafy this, it's a real
+		     * token.
+		     */
+		    *tdest++ = *s;
+		    continue;
+		}
+	    }
+	}
 	if (meta == 2) {
 	    t[-1] |= 0x80;
 	    meta = 0;
@@ -4818,18 +4907,31 @@
 	    t[-1] |= 0x80;
 	    meta = 0;
 	}
-	if ((how & GETKEY_DOLLAR_QUOTE) && imeta(t[-1])) {
-	    *t = t[-1] ^ 32;
-	    t[-1] = Meta;
-	    t++;
+	if (how & GETKEY_DOLLAR_QUOTE) {
+	    char *t2;
+	    for (t2 = torig; t2 < t; t2++) {
+		if (imeta(*t2)) {
+		    *tdest++ = Meta;
+		    *tdest++ = *t2 ^ 32;
+		} else
+		    *tdest++ = *t2;
+	    }
 	}
 	if ((how & GETKEY_SINGLE_CHAR) && t != tmp) {
 	    *misc = STOUC(tmp[0]);
 	    return s + 1;
 	}
     }
-    DPUTS(how & GETKEY_DOLLAR_QUOTE, "BUG: unterminated $' substitution");
+    /*
+     * When called from completion, where we use GETKEY_UPDATE_OFFSET to
+     * update the index into the metafied editor line, we don't necessarily
+     * have the end of a $'...' quotation, else we should do.
+     */
+    DPUTS((how & (GETKEY_DOLLAR_QUOTE|GETKEY_UPDATE_OFFSET)) ==
+	  GETKEY_DOLLAR_QUOTE, "BUG: unterminated $' substitution");
     *t = '\0';
+    if (how & GETKEY_DOLLAR_QUOTE)
+	*tdest = '\0';
     if (how & GETKEY_SINGLE_CHAR)
       *misc = 0;
     else
Index: Src/zsh.h
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/zsh.h,v
retrieving revision 1.102
diff -u -r1.102 zsh.h
--- Src/zsh.h	1 Nov 2006 12:25:22 -0000	1.102
+++ Src/zsh.h	3 Dec 2006 20:48:27 -0000
@@ -1910,7 +1910,10 @@
 /****************/
 
 #ifdef DEBUG
-# define DPUTS(X,Y) if (!(X)) {;} else dputs(Y)
+#define STRINGIFY_LITERAL(x)	# x
+#define STRINGIFY(x)		STRINGIFY_LITERAL(x)
+#define ERRMSG(x)		(__FILE__ ":" STRINGIFY(__LINE__) ": " x)
+# define DPUTS(X,Y) if (!(X)) {;} else dputs(ERRMSG(Y))
 #else
 # define DPUTS(X,Y)
 #endif
@@ -1971,7 +1974,13 @@
     /* Handle \- (uses misc arg to getkeystring()) */
     GETKEY_BACKSLASH_MINUS = (1 << 5),
     /* Parse only one character (len arg to getkeystring() not used) */
-    GETKEY_SINGLE_CHAR = (1 << 6)
+    GETKEY_SINGLE_CHAR = (1 << 6),
+    /*
+     * If beyond offset in misc arg, add 1 to it for each character removed.
+     * Yes, I know that doesn't seem to make much sense.
+     * It's for use in completion, comprenez?
+     */
+    GETKEY_UPDATE_OFFSET = (1 << 7)
 };
 
 /*
Index: Src/Zle/compcore.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/compcore.c,v
retrieving revision 1.86
diff -u -r1.86 compcore.c
--- Src/Zle/compcore.c	5 Oct 2006 21:53:27 -0000	1.86
+++ Src/Zle/compcore.c	3 Dec 2006 20:48:28 -0000
@@ -1073,24 +1073,56 @@
 check_param(char *s, int set, int test)
 {
     char *p;
+    int found = 0;
 
     zsfree(parpre);
     parpre = NULL;
 
     if (!test)
 	ispar = parq = eparq = 0;
-    /* Try to find a `$'. */
-    for (p = s + offs; p > s && *p != String && *p != Qstring; p--);
-    if (*p == String || *p == Qstring) {
-	/* Handle $$'s */
+    /*
+     * Try to find a `$'.
+     *
+     * TODO: passing s as a parameter while we get some mysterious
+     * offset "offs" into it via a global sucks badly.
+     */ 
+    for (p = s + offs; ; p--) {
+	if (*p == String || *p == Qstring) {
+	    /*
+	     * String followed by Snull (unquoted) or
+	     * QString followed by ' (quoted) indicate a nested
+	     * $'...', not a substitution.
+	     *
+	     * TODO: the argument passing is obscure, no idea if
+	     * it's safe to test for the "'" at the end.
+	     */
+	    if (p < s + offs &&
+		!(*p == String && p[1] == Snull) &&
+		!(*p == Qstring && p[1] == '\'')) {
+		found = 1;
+		break;
+	    }
+	}
+	if (p == s)
+	    break;
+    }
+    if (found) {
+	/*
+	 * Handle $$'s
+	 *
+	 * TODO: this is already bad enough, so I haven't tried
+	 * testing for $'...' here.  If we parsed this forwards
+	 * it wouldn't be quite so bad.
+	 */
 	while (p > s && (p[-1] == String || p[-1] == Qstring))
 	    p--;
 	while ((p[1] == String || p[1] == Qstring) &&
 	       (p[2] == String || p[2] == Qstring))
 	    p += 2;
     }
-    if ((*p == String || *p == Qstring) && p[1] != Inpar && p[1] != Inbrack) {
-	/* This is really a parameter expression (not $(...) or $[...]). */
+    if (found &&
+	p[1] != Inpar && p[1] != Inbrack && p[1] != Snull) {
+	/* This is a parameter expression, not $(...), $[...], $'...'. */
 	char *b = p + 1, *e = b, *ie;
 	int n = 0, br = 1, nest = 0;
 
@@ -1268,6 +1300,17 @@
     return r;
 }
 
+
+/*
+ * This function reconstructs the full completion argument in
+ * heap memory by concatenating and, if untok is non-zero, untokenizing
+ * the ignored prefix and the active prefix and suffix.
+ * (It appears from the function that the ignored prefix won't
+ * be tokenized but I haven't checked this.)
+ * ipl and/or pl may be passed and if so will be set to the ignored
+ * prefix length and active prefix length respectively.
+ */
+
 /**/
 mod_export char *
 comp_str(int *ipl, int *pl, int untok)
@@ -1329,15 +1372,94 @@
 int
 set_comp_sep(void)
 {
+    /*
+     * s: full (reconstructed) completion argument
+     * lip: ignored prefix length
+     * lp: active prefix length
+     * 1: the number "one" => untokenize
+     */
     int lip, lp;
     char *s = comp_str(&lip, &lp, 1);
     LinkList foo = newlinklist();
     LinkNode n;
-    int owe = we, owb = wb, ocs, swb, swe, scs, soffs, ne = noerrs;
-    int tl, got = 0, i = 0, j, cur = -1, oll, sl, css = 0;
-    int remq = 0, dq = 0, odq, sq = 0, osq, issq = 0, sqq = 0, lsq = 0, qa = 0;
+    /* Save word position */
+    int owe = we, owb = wb;
+    /* Save cursor position and line length */
+    int ocs, oll;
+    /*
+     * Values of word beginning and end and cursor after subtractions
+     * due to separators.   I think these are indexes into zlemetaline,
+     * but with some subtractions; they don't see to be indexes into
+     * s, which is the current argument before quote stripping.
+     */
+    int swb, swe, scs;
+    /* Offset into current word after subtractions. */
+    int soffs;
+    /* Current state of error suppression. */
+    int ne = noerrs;
+    /* Length of tmp string */
+    int tl;
+    /* flag that we've got the current completion word, perhaps? */
+    int got = 0;
+    /*
+     * i starts off as the number of the completion word we're looking at,
+     * which is why it's initialised, but is then recycled as a
+     * loop variable.  j is always a loop variable.
+     */
+    int i = 0, j;
+    /*
+     * cur: completion word currently being completed (0 offset).
+     * sl: length of string s, the string we're manipulating.
+     * css: modification of offset into current word beyond cursor
+     * position due to the effects of backslashing, counted during our first
+     * examination of compqstack for double quotes and dollar quotes.
+     * However, for some reason, when the current quoting scheme is
+     * backslashing we modify swb directly later rather than counting it at
+     * the point we remove the backquotes.
+     */
+    int cur = -1, sl, css = 0;
+    /*
+     * Flag that we're doing the thing with backslashes mentioned
+     * for css.
+     */
+    int remq = 0;
+    /*
+     * dq: backslash-removals for double quotes
+     * odq: value of dq before modification for active (Bnull'ed)
+     *      backslashes, or something.
+     * sq: quote-removals for single quotes; either RCQUOTES or '\'' which
+     *     are specially handled (but currently only if RCQUOTES is not
+     *     set, which isn't necessarily correct if the quotes were typed by
+     *     the user).
+     * osq: c.f. odq, taking account of Snull's and embeded "'"'s.
+     * issq: flag that current quoting is single quotes; I assume that
+     *       civilization would end if we used a consistent way of
+     *       flagging the different types of quotes, or something.
+     * lsq: when quoting is single quotes (issq), counts the offset
+     *      adjustment needed in the word being examined in the lexer loop.
+     * sqq: the value of lsq for the current completion word.
+     * qa:  not, unfortunately, a question and answer session with the
+     *      original author, but the number of characters being removed
+     *      when stripping single quotes: 1 for RCQUOTES, 3 otherwise
+     *      (because we leave a "'" in the final string).
+     */
+    int dq = 0, odq, sq = 0, osq, issq = 0, sqq = 0, lsq = 0, qa = 0;
+    /* dolq: like sq and dq but for dollars quoting. */
+    int dolq = 0;
+    /* remember some global variable values (except lp is local) */
     int ois = instring, oib = inbackt, noffs = lp, ona = noaliases;
-    char *tmp, *p, *ns, *ol, sav, *qp, *qs, *ts;
+    /*
+     * tmp: used for temporary processing of strings
+     * p: loop pointer for tmp etc.
+     * ns: holds yet another version of the current completion string,
+     *     goodness knows how it differs from s, tmp, ts, ...
+     * ts: untokenized ns
+     * ol: saves old metafied editing line
+     * sav: save character when NULLed; careful, there's a nested
+     *      definition of sav just to keep you on your toes
+     * qp, qs: prefix and suffix strings deduced from s.
+     */
+    char *tmp, *p, *ns, *ts, *ol, sav, *qp, *qs;
 
     METACHECK();
 
@@ -1366,7 +1488,7 @@
     switch (*compqstack) {
     case QT_NONE:
 #ifdef DEBUG
-	dputs("BUG: head of compstack is NULL");
+	dputs("BUG: head of compqstack is NULL");
 #endif
 	break;
 
@@ -1386,10 +1508,20 @@
 
     case QT_DOUBLE:
         for (j = 0, p = tmp; *p; p++, j++)
-            if (*p == '\\' && p[1] == '\\') {
-                dq++;
+	    /*
+	     * I added the handling for " here: before it just handled
+	     * backslashes.  This meant that a \" inside a " wasn't
+	     * handled properly.  I presume that was an oversight.
+	     * I don't know if this is the right place to fix this
+	     * particular problem because I'm utterly confused by
+	     * the structure of the code in this function.
+	     */
+            if (*p == '\\' && (p[1] == '\\' || p[1] == '"')) {
+		dq++;
                 chuck(p);
-                if (j > zlemetacs) {
+		if (*p == '"')
+		    zlemetacs--;
+		else if (j > zlemetacs) {
                     zlemetacs++;
                     css++;
                 }
@@ -1399,7 +1531,15 @@
 	break;
 
     case QT_DOLLARS:
-	/* TODO */
+	sl = strlen(tmp);
+	j = zlemetacs;
+	tmp = getkeystring(tmp, &tl,
+			   GETKEY_DOLLAR_QUOTE|GETKEY_UPDATE_OFFSET,
+			   &zlemetacs);
+	/* The number of characters we removed because of $' quoting */
+	dolq = sl - tl;
+	/* Offset into the word is modified, too... */
+	css += zlemetacs - j;
 	break;
     }
     odq = dq;
@@ -1416,6 +1556,12 @@
 
 	    if (!tokstr)
 		break;
+	    /*
+	     * If there was an error, it may be because we're in
+	     * an unterminated string.  Count the active quote
+	     * characters to see.  We need an odd number.
+	     * This works for $', too, since the ' there is an Snull.
+	     */
 	    for (j = 0, p = tokstr; *p; p++) {
 		if (*p == Snull || *p == Dnull)
 		    j++;
@@ -1456,8 +1602,8 @@
 	    DPUTS(!p, "no current word in substr");
 	    got = 1;
 	    cur = i;
-	    swb = wb - 1 - dq - sq;
-	    swe = we - 1 - dq - sq;
+	    swb = wb - 1 - dq - sq - dolq;
+	    swe = we - 1 - dq - sq - dolq;
             sqq = lsq;
 	    soffs = zlemetacs - swb - css;
 	    chuck(p + soffs);
@@ -1524,6 +1670,18 @@
 	zsfree(autoq);
 	autoq = NULL;
     }
+
+    /*
+     * In the following loop we look for parse quotes yet again.
+     * I don't really have the faintest idea why, but given that
+     * ns is immediately reassigned from ts afterwards (why? what's
+     * wrong with it being in ts?) and scs isn't used again, I
+     * presume it's in aid of getting the indexes for word beginning
+     * (swb) and start offset (soffs) into s correct.
+     *
+     * I think soffs is an index into s, while swb and scs are indexes
+     * into the full line but with some jiggery pokery for quote removal.
+     */
     for (p = ns, i = swb; *p; p++, i++) {
 	if (inull(*p)) {
 	    if (i < scs) {
@@ -1560,7 +1718,22 @@
 	if (ql > rl)
 	    swb -= ql - rl;
     }
-    sav = s[(i = swb - 1 - sqq)];
+    /*
+     * Using the word beginning and end as an index into the reconstructed
+     * string s, swb and swe, we can get the strings before and after
+     * the word we're considering.
+     *
+     * Because it would be too easy otherwise, there are random
+     * additional subtractions to be made.  The 1 might be something
+     * to do with the space that appeared mysteriously at the start of the
+     * line when we passed it through the lexer.  The sqq is to do with
+     * the single quote quoting when we passed it through the lexer.
+     *
+     * TODO: I added the "+ dq" because it seemed to improve matters for
+     * double quoting but the fact it's arrived at in a rather different way
+     * from sqq may indicate this is wrong.  $'...' may need something, too.
+     */
+    sav = s[(i = swb - 1 - sqq + dq)];
     s[i] = '\0';
     qp = (issq ? dupstring(s) : rembslash(s));
     s[i] = sav;
@@ -1583,12 +1756,12 @@
     }
     {
 	int set = CP_QUOTE | CP_QUOTING, unset = 0;
-	char compnewchars[2];
 
-	compnewchars[0] =
-	    (char)(instring == QT_NONE ? QT_BACKSLASH : instring);
-	compnewchars[1] = '\0';
-	p = tricat(compnewchars, compqstack, "");
+	tl = strlen(compqstack);
+	p = zalloc(tl + 2);
+	*p = (char)(instring == QT_NONE ? QT_BACKSLASH : instring);
+	memcpy(p+1, compqstack, tl);
+	p[tl+1] = '\0';
 	zsfree(compqstack);
 	compqstack = p;
 
@@ -1898,7 +2071,7 @@
 		break;
 	    }
 	    inbackt = 0;
-	    autoq = multiquote(compquote, 1);
+	    autoq = multiquote(*compquote == '$' ? compquote+1 : compquote, 1);
 	}
     } else {
 	instring = QT_NONE;
Index: Src/Zle/compctl.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/compctl.c,v
retrieving revision 1.30
diff -u -r1.30 compctl.c
--- Src/Zle/compctl.c	16 Oct 2006 17:02:06 -0000	1.30
+++ Src/Zle/compctl.c	3 Dec 2006 20:48:29 -0000
@@ -2309,7 +2309,7 @@
 		    break;
 		}
 		inbackt = 0;
-		strcpy(buf, compquote);
+		strcpy(buf, *compquote == '$' ? compquote+1 : compquote);
 		autoq = buf;
 	    }
 	} else {
Index: Src/Zle/zle.h
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle.h,v
retrieving revision 1.33
diff -u -r1.33 zle.h
--- Src/Zle/zle.h	3 Aug 2006 15:37:50 -0000	1.33
+++ Src/Zle/zle.h	3 Dec 2006 20:48:29 -0000
@@ -324,13 +324,10 @@
 };
 
 #ifdef DEBUG
-#define STRINGIFY_LITERAL(x)	# x
-#define STRINGIFY(x)		STRINGIFY_LITERAL(x)
-#define ERRMSG(x)		(__FILE__ ":" STRINGIFY(__LINE__) ": " x)
 #define METACHECK()		\
-	DPUTS(zlemetaline == NULL, ERRMSG("line not metafied"))
+	DPUTS(zlemetaline == NULL, "line not metafied")
 #define UNMETACHECK()		\
-	DPUTS(zlemetaline != NULL, ERRMSG("line metafied"))
+	DPUTS(zlemetaline != NULL, "line metafied")
 #else
 #define METACHECK()
 #define UNMETACHECK()
Index: Src/Zle/zle_tricky.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_tricky.c,v
retrieving revision 1.79
diff -u -r1.79 zle_tricky.c
--- Src/Zle/zle_tricky.c	2 Nov 2006 16:26:24 -0000	1.79
+++ Src/Zle/zle_tricky.c	3 Dec 2006 20:48:30 -0000
@@ -522,8 +522,12 @@
 	       (p[2] == String || p[2] == Qstring))
 	    p += 2;
     }
-    if ((*p == String || *p == Qstring) && p[1] != Inpar && p[1] != Inbrack) {
-	/* This is really a parameter expression (not $(...) or $[...]). */
+    if ((*p == String || *p == Qstring) &&
+	p[1] != Inpar && p[1] != Inbrack && p[1] != '\'') {
+	/*
+	 * This is really a parameter expression (not $(...) or $[...]
+	 * or $'...').
+	 */
 	char *b = p + 1, *e = b;
 	int n = 0, br = 1, nest = 0;
 
@@ -1560,6 +1564,12 @@
 	n = tricat(qipre, q, "");
 	zsfree(qipre);
 	qipre = n;
+	/*
+	 * TODO: it's certainly the case that the suffix for
+	 * $' is ', but exactly what does that affect?
+	 */
+	if (*q == '$')
+	    q++;
 	if (sl > 1 && qtptr[sl - 1] == *qtptr) {
 	    n = tricat(q, qisuf, "");
 	    zsfree(qisuf);
@@ -1578,10 +1588,17 @@
         }
     }
     /* While building the quoted form, we also clean up the command line. */
-    for (p = s, i = wb, j = 0; *p; p++, i++)
-	if (inull(*p)) {
+    for (p = s, i = wb, j = 0; *p; p++, i++) {
+	int skipchars;
+	if ((*p == String || *p == Qstring) && p[1] == Snull)
+	    skipchars = 2;
+	else if (inull(*p))
+	    skipchars = 1;
+	else
+	    skipchars = 0;
+	if (skipchars) {
 	    if (i < zlemetacs)
-		offs--;
+		offs -= skipchars;
 	    if (*p == Snull && isset(RCQUOTES))
 		j = 1-j;
 	    if (p[1] || *p != Bnull) {
@@ -1591,24 +1608,29 @@
 		} else {
 		    ocs = zlemetacs;
 		    zlemetacs = i;
-		    foredel(1);
-		    if ((zlemetacs = ocs) > i--)
-			zlemetacs--;
-		    we--;
+		    foredel(skipchars);
+		    if ((zlemetacs = ocs) > (i -= skipchars))
+			zlemetacs -= skipchars;
+		    we -= skipchars;
 		}
 	    } else {
 		ocs = zlemetacs;
 		zlemetacs = we;
-		backdel(1);
+		backdel(skipchars);
 		if (ocs == we)
-		    zlemetacs = we - 1;
+		    zlemetacs = we - skipchars;
 		else
 		    zlemetacs = ocs;
-		we--;
+		we -= skipchars;
 	    }
-	    chuck(p--);
+	    /* we need to get rid of all the quotation bits... */
+	    while (skipchars--)
+		chuck(p);
+	    /* but we only decrement once to confuse the loop increment. */
+	    p--;
 	} else if (j && *p == '\'' && i < zlemetacs)
 	    offs--;
+    }
 
     zsfree(origword);
     origword = ztrdup(s);
@@ -1639,7 +1661,7 @@
 		    i += tp - p;
 		    dp += tp - p;
 		    p = tp;
-		} else {
+		} else if (p[1] != Snull /* paranoia: should be gone now */) {
 		    char *tp = p + 1;
 
 		    for (; *tp == '^' || *tp == Hat ||

-- 
Peter Stephenson <p.w.stephenson@xxxxxxxxxxxx>
Web page now at http://homepage.ntlworld.com/p.w.stephenson/



Messages sorted by: Reverse Date, Date, Thread, Author