Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: Another ${(z)param} buglet



On Thu, 9 Dec 2010 18:16:32 +0000
Peter Stephenson <Peter.Stephenson@xxxxxxx> wrote:
> The best I can think of is extending the syntax to append options, a bit
> like (q) can have the q multiple or a - added, but maybe less gross
> e.g. explicit option flags like z+c+ to turn on comment handling (and
> possibly z+C+ to strip comments). '+' appears not to be taken so would
> work without complications and there's a mnemonic that there's more to
> come (compared with "(q-)").

Here it is implemented and tested, so you can tell me it's not good
enough.

Luckily, after ten years in the electronics industry I never get bitter
and am entirely used to constant failure.

Note I've made the new parsed history reading option handle comments as
the option least likely to screw up: you can always parse a comment, you
just read to the end of the line.

Index: Doc/Zsh/expn.yo
===================================================================
RCS file: /cvsroot/zsh/zsh/Doc/Zsh/expn.yo,v
retrieving revision 1.126
diff -p -u -r1.126 expn.yo
--- Doc/Zsh/expn.yo	6 Dec 2010 13:53:15 -0000	1.126
+++ Doc/Zsh/expn.yo	9 Dec 2010 20:13:19 -0000
@@ -1006,6 +1006,17 @@ errors are silently ignored.
 item(tt(z))(
 Split the result of the expansion into words using shell parsing to
 find the words, i.e. taking into account any quoting in the value.
+Comments are not treated specially but as ordinary strings, similar
+to interactive shells with the tt(INTERACTIVE_COMMENTS) option unset.
+
+The flag can take option letters between a following pair of
+`tt(PLUS())' characters.  tt(LPAR()z+PLUS()c+PLUS()RPAR()) causes
+comments to be parsed as a string and retained; any field in the
+resulting array beginning with an unquoted comment character is a
+comment.  tt(LPAR()z+PLUS()C+PLUS()RPAR()) causes comments to be parsed
+and removed.  The rule for comments is standard: anything between a word
+starting with the third charcter of tt($HISTCHARS), default tt(#), up to
+the next newline is a comment.
 
 Note that this is done very late, as for the `tt((s))' flag. So to
 access single words in the result, one has to use nested expansions as 
Index: Src/hist.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/hist.c,v
retrieving revision 1.106
diff -p -u -r1.106 hist.c
--- Src/hist.c	10 Oct 2010 17:51:29 -0000	1.106
+++ Src/hist.c	9 Dec 2010 20:13:20 -0000
@@ -2345,7 +2345,7 @@ readhistfile(char *fn, int err, int read
 		/*
 		 * Attempt to do this using the lexer.
 		 */
-		LinkList wordlist = bufferwords(NULL, pt, NULL);
+		LinkList wordlist = bufferwords(NULL, pt, NULL, 1);
 		LinkNode wordnode;
 		int nwords_max;
 		nwords_max = 2 * countlinknodes(wordlist);
@@ -2885,11 +2885,27 @@ histfileIsLocked(void)
  * which may not even be valid at this point.
  *
  * However, I'm so confused it could simply be baking Bakewell tarts.
+ *
+ * list may be an existing linked list (off the heap), in which case
+ * it will be appended to; otherwise it will be created.
+ *
+ * If buf is set we will take input from that string, else we will
+ * attempt to use ZLE directly in a way they tell you not to do on all
+ * programming courses.
+ *
+ * If index is non-NULL, and input is from a string in ZLE, *index
+ * is set to the position of the end of the current editor word.
+ *
+ * comments is used if buf is non-NULL (i.e. this is not a string
+ * from ZLE).
+ * If it is 0, comments are not parsed; they are treated as ordinary words.
+ * If it is 1, comments are treated as single strings, one per line.
+ * If it is 2, comments are removed.
  */
 
 /**/
 mod_export LinkList
-bufferwords(LinkList list, char *buf, int *index)
+bufferwords(LinkList list, char *buf, int *index, int comments)
 {
     int num = 0, cur = -1, got = 0, ne = noerrs;
     int owb = wb, owe = we, oadx = addedx, ozp = zleparse, onc = nocomments;
@@ -2906,7 +2922,6 @@ bufferwords(LinkList list, char *buf, in
      * string expression, we just turn the option off for this function.
      */
     opts[RCQUOTES] = 0;
-    zleparse = 1;
     addedx = 0;
     noerrs = 1;
     lexsave();
@@ -2928,11 +2943,18 @@ bufferwords(LinkList list, char *buf, in
 	inpush(p, 0, NULL);
 	zlemetall = strlen(p) ;
 	zlemetacs = zlemetall + 1;
-	nocomments = 1;
+
+	/*
+	 * If comments is non-zero we are handling comments.
+	 * zleparse indicates the mode to the lexer.
+	 */
+	zleparse = 1 + comments;
+	nocomments = !comments;
     } else {
 	int ll, cs;
 	char *linein;
 
+	zleparse = 1;
 	linein = zleentry(ZLE_CMD_GET_LINE, &ll, &cs);
 	zlemetall = ll + 1; /* length of line plus space added below */
 	zlemetacs = cs;
Index: Src/lex.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/lex.c,v
retrieving revision 1.57
diff -p -u -r1.57 lex.c
--- Src/lex.c	18 Nov 2010 10:07:56 -0000	1.57
+++ Src/lex.c	9 Dec 2010 20:13:21 -0000
@@ -116,7 +116,22 @@ mod_export int wb, we;
 /**/
 mod_export int noaliases;
 
-/* we are parsing a line sent to use by the editor */
+/*
+ * we are parsing a line sent to use by the editor, or some other string
+ * that's not part of standard command input (e.g. eval is part of
+ * normal command input).
+ *
+ * zleparse = 1 is the normal case.
+ * zleparse = 2 is used for word splitting; the difference is we
+ *              preserve comments.
+ * zleparse = 3 is also for word splitting, here handling comments
+ *              but stripping them.
+ *
+ * Note that although it is passed into the lexer as an input, the
+ * lexer can set it to zero after finding the word it's searching for.
+ * This only happens if the line being parsed actually does come from
+ * ZLE.
+ */
 
 /**/
 mod_export int zleparse;
@@ -743,26 +758,50 @@ gettok(void)
 
     /* chars in initial position in word */
 
+    /*
+     * Handle comments.  There are some special cases when this
+     * is not normal command input: zleparse implies we are examining
+     * a line lexically without it being used for normal command input.
+     * If zleparse is 1 we treat comments as normal for interactive
+     * mode.
+     * If zleparse is 2 (which has actually got nothing to do with zle)
+     * we always handle comments and retain them.
+     * If zleparse is 3 we always handle comments and discard them.
+     */
     if (c == hashchar && !nocomments &&
 	(isset(INTERACTIVECOMMENTS) ||
-	 (!zleparse && !expanding &&
+	 ((zleparse != 1) && !expanding &&
 	  (!interact || unset(SHINSTDIN) || strin)))) {
 	/* History is handled here to prevent extra  *
 	 * newlines being inserted into the history. */
 
+	if (zleparse == 2) {
+	    len = 0;
+	    bptr = tokstr = (char *)hcalloc(bsiz = 32);
+	    add(c);
+	}
 	while ((c = ingetc()) != '\n' && !lexstop) {
 	    hwaddc(c);
 	    addtoline(c);
+	    if (zleparse == 2)
+		add(c);
 	}
 
 	if (errflag)
 	    peek = LEXERR;
 	else {
-	    hwend();
-	    hwbegin(0);
-	    hwaddc('\n');
-	    addtoline('\n');
-	    peek = NEWLIN;
+	    if (zleparse == 2) {
+		*bptr = '\0';
+		if (!lexstop)
+		    hungetc(c);
+		peek = STRING;
+	    } else {
+		hwend();
+		hwbegin(0);
+		hwaddc('\n');
+		addtoline('\n');
+		peek = NEWLIN;
+	    }
 	}
 	return peek;
     }
Index: Src/subst.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/subst.c,v
retrieving revision 1.112
diff -p -u -r1.112 subst.c
--- Src/subst.c	25 Nov 2010 10:34:32 -0000	1.112
+++ Src/subst.c	9 Dec 2010 20:13:21 -0000
@@ -1556,6 +1556,10 @@ paramsubst(LinkList l, LinkNode n, char 
      * The (z) flag, nothing to do with SH_WORD_SPLIT which is tied
      * spbreak, see above; fairly straighforward in use but c.f.
      * the comment for mods.
+     *
+     * This ultimately becomes zleparse during lexical analysis, via
+     * the comments argument to bufferwords(). It's got nothing
+     * to do with zle.
      */
     int shsplit = 0;
     /*
@@ -1934,6 +1938,27 @@ paramsubst(LinkList l, LinkNode n, char 
 
 		case 'z':
 		    shsplit = 1;
+		    if (s[1] == '+') {
+			s += 2;
+			while (*s && *s != '+' && *s != ')' && *s != Outpar) {
+			    switch (*s++) {
+			    case 'c':
+				/* Parse and keep comments */
+				shsplit = 2;
+				break;
+
+			    case 'C':
+				/* Parse and remove comments */
+				shsplit = 3;
+				break;
+
+			    default:
+				goto flagerr;
+			    }
+			}
+			if (*s != '+')
+			    goto flagerr;
+		    }
 		    break;
 
 		case 'u':
@@ -3207,10 +3232,10 @@ paramsubst(LinkList l, LinkNode n, char 
 	if (isarr) {
 	    char **ap;
 	    for (ap = aval; *ap; ap++)
-		list = bufferwords(list, *ap, NULL);
+		list = bufferwords(list, *ap, NULL, shsplit-1);
 	    isarr = 0;
 	} else
-	    list = bufferwords(NULL, val, NULL);
+	    list = bufferwords(NULL, val, NULL, shsplit-1);
 
 	if (!list || !firstnode(list))
 	    val = dupstring("");
Index: Src/Modules/parameter.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Modules/parameter.c,v
retrieving revision 1.51
diff -p -u -r1.51 parameter.c
--- Src/Modules/parameter.c	11 Feb 2009 20:42:17 -0000	1.51
+++ Src/Modules/parameter.c	9 Dec 2010 20:13:21 -0000
@@ -1044,7 +1044,7 @@ histwgetfn(UNUSED(Param pm))
     int i = addhistnum(curhist, -1, HIST_FOREIGN), iw;
     Histent he = gethistent(i, GETHIST_UPWARD);
 
-    if ((ll = bufferwords(NULL, NULL, NULL)))
+    if ((ll = bufferwords(NULL, NULL, NULL, 0)))
         for (n = firstnode(ll); n; incnode(n))
             pushnode(l, getdata(n));
 
Index: Src/Zle/zle_hist.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_hist.c,v
retrieving revision 1.66
diff -p -u -r1.66 zle_hist.c
--- Src/Zle/zle_hist.c	6 Oct 2010 08:25:29 -0000	1.66
+++ Src/Zle/zle_hist.c	9 Dec 2010 20:13:22 -0000
@@ -677,7 +677,7 @@ insertlastword(char **args)
 	 * a deleted word, because that can only have come
 	 * from a non-empty line.  I think.
 	 */
-	if (!(l = bufferwords(NULL, NULL, NULL))) {
+	if (!(l = bufferwords(NULL, NULL, NULL, 0))) {
 	    unmetafy_line();
 	    return 1;
 	}
Index: Src/Zle/zle_misc.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_misc.c,v
retrieving revision 1.58
diff -p -u -r1.58 zle_misc.c
--- Src/Zle/zle_misc.c	24 Apr 2009 09:00:38 -0000	1.58
+++ Src/Zle/zle_misc.c	9 Dec 2010 20:13:22 -0000
@@ -843,7 +843,7 @@ copyprevshellword(UNUSED(char **args))
     if (zmult <= 0)
 	return 1;
 
-    if ((l = bufferwords(NULL, NULL, &i))) {
+    if ((l = bufferwords(NULL, NULL, &i, 0))) {
 	i -= (zmult-1);
 	if (i < 0)
 	    return 1;
Index: Test/D04parameter.ztst
===================================================================
RCS file: /cvsroot/zsh/zsh/Test/D04parameter.ztst,v
retrieving revision 1.47
diff -p -u -r1.47 D04parameter.ztst
--- Test/D04parameter.ztst	25 Nov 2010 10:34:32 -0000	1.47
+++ Test/D04parameter.ztst	9 Dec 2010 20:13:22 -0000
@@ -417,6 +417,45 @@
 >5:i++ :
 >6:)):
 
+  line=$'A line with # someone\'s comment\nanother line # (1 more\nanother one'
+  print "*** Normal ***"
+  print -l ${(z)line}
+  print "*** Kept ***"
+  print -l ${(z+c+)line}
+  print "*** Removed ***"
+  print -l ${(z+C+)line}
+0:Comments with (z)
+>*** Normal ***
+>A
+>line
+>with
+>#
+>someone's comment
+>another line # (1 more
+>another one
+>*** Kept ***
+>A
+>line
+>with
+># someone's comment
+>;
+>another
+>line
+># (1 more
+>;
+>another
+>one
+>*** Removed ***
+>A
+>line
+>with
+>;
+>another
+>line
+>;
+>another
+>one
+
   psvar=(dog)
   setopt promptsubst
   foo='It shouldn'\''t $(happen) to a %1v.'
-- 
Peter Stephenson <p.w.stephenson@xxxxxxxxxxxx>
Web page now at http://homepage.ntlworld.com/p.w.stephenson/



Messages sorted by: Reverse Date, Date, Thread, Author