Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

PATCH: parameter substitution for exclusion by array



I was completing after "autoload" the other day and got an error message
about a pattern.  I didn't notice in detail what the problem in the
pattern was, but the error came from this:

      # Filter out functions already loaded or marked for autoload.
      args=(${args:#(${(kj.|.)~functions})})

Function names can actually include pattern characters, though it's not
usually a good idea, although possible to produce by mistake, so they
would need quoting here.  Quoting patterns is a bit icky, since you
can't quote characters that aren't special to the pattern (even if they
are otherwise special to the shell), so using (q) doesn't quite do what
you want.  What's more, although the expression above is about the best
we can do if you want to use a pattern, it's very inefficient for
removing elements from an array, effectively performing a quadratic
search since we don't try to optimise patterns.

So I thought about an operation that would remove elements of an array
using another array and, serendipitously, the new implementation of
uniqarray(), or rather the hash table that goes with it, is exactly
what's needed to do this efficiently, since it's the same problem but
with two arrays instead of one.

I thought of using ${...:~...} for this, but unfortunately ~ can
introduce an arithmetic expression, so this introduces an ambiguity with
index offset notation.  So I picked ${...:|...}, which has a double
mnemonic: bar the "or" of the array and currently always producds an
invalid expression.

Then I realised it was a trivial modification to get the intersection of
two arrays instead.  For that, ${...:&...} might be nice, but :& can be
a history modifier.  So I picked ${...:*...} as being the nearest thing
that wasn't used.

It might be nice to allow ${...:&(k)...} to use the keys of an
associative array, but that can wait.

Index: Completion/Zsh/Command/_typeset
===================================================================
RCS file: /cvsroot/zsh/zsh/Completion/Zsh/Command/_typeset,v
retrieving revision 1.4
diff -p -u -r1.4 _typeset
--- Completion/Zsh/Command/_typeset	6 Nov 2006 17:15:00 -0000	1.4
+++ Completion/Zsh/Command/_typeset	20 Apr 2012 19:15:06 -0000
@@ -77,7 +77,9 @@ if [[ "$state" = vars_eq ]]; then
     elif [[ $service = autoload || -n $opt_args[(i)-[uU]] ]]; then
       args=(${^fpath}/*(:t))
       # Filter out functions already loaded or marked for autoload.
-      args=(${args:#(${(kj.|.)~functions})})
+      local -a funckeys
+      funckeys=(${(k)functions})
+      args=${args:|funckeys}
       _wanted functions expl 'shell function' compadd -a args
     else
       _functions
Index: Doc/Zsh/expn.yo
===================================================================
RCS file: /cvsroot/zsh/zsh/Doc/Zsh/expn.yo,v
retrieving revision 1.141
diff -p -u -r1.141 expn.yo
--- Doc/Zsh/expn.yo	11 Dec 2011 17:22:59 -0000	1.141
+++ Doc/Zsh/expn.yo	20 Apr 2012 19:15:07 -0000
@@ -604,6 +604,19 @@ If var(name) is an array
 the matching array elements are removed (use the `tt((M))' flag to
 remove the non-matched elements).
 )
+item(tt(${)var(name)tt(:|)var(arrayname)tt(}))(
+If var(arrayname) is the name (N.B., not contents) of an array
+variable, then any elements contained in var(arrayname) are removed
+from the substitution of var(name).  If the substitution is scalar,
+either because var(name) is a scalar variable or the expression is
+quoted, the elements of var(arrayname) are instead tested against the
+entire expression.
+)
+item(tt(${)var(name)tt(:*)var(arrayname)tt(}))(
+Similar to the preceding subsitution, but in the opposite sense,
+so that entries present in both the original substitution and as
+elements of var(arrayname) are retained and others removed.
+)
 xitem(tt(${)var(name)tt(:)var(offset)tt(}))
 item(tt(${)var(name)tt(:)var(offset)tt(:)var(length)tt(}))(
 This syntax gives effects similar to parameter subscripting
Index: Src/params.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/params.c,v
retrieving revision 1.180
diff -p -u -r1.180 params.c
--- Src/params.c	13 Apr 2012 16:01:23 -0000	1.180
+++ Src/params.c	20 Apr 2012 19:15:07 -0000
@@ -3493,7 +3493,7 @@ arrayuniq_freenode(HashNode hn)
 }
 
 /**/
-static HashTable
+HashTable
 newuniqtable(zlong size)
 {
     HashTable ht = newhashtable((int)size, "arrayuniq", NULL);
Index: Src/subst.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/subst.c,v
retrieving revision 1.133
diff -p -u -r1.133 subst.c
--- Src/subst.c	29 Feb 2012 09:57:41 -0000	1.133
+++ Src/subst.c	20 Apr 2012 19:15:07 -0000
@@ -2872,6 +2872,49 @@ paramsubst(LinkList l, LinkNode n, char 
 	    }
 	    break;
 	}
+    } else if (inbrace && (*s == '|' || *s == Bar ||
+			   *s == '*' || *s == Star)) {
+	int intersect = (*s == '*' || *s == Star);
+	char **compare = getaparam(++s), **ap, **apsrc;
+	if (compare) {
+	    HashTable ht = newuniqtable(arrlen(compare)+1);
+	    int present;
+	    for (ap = compare; *ap; ap++)
+		(void)addhashnode2(ht, *ap, (HashNode)
+				   zhalloc(sizeof(struct hashnode)));
+	    if (!vunset && isarr) {
+		if (!copied) {
+		    aval = arrdup(aval);
+		    copied = 1;
+		}
+		for (ap = apsrc = aval; *apsrc; apsrc++) {
+		    untokenize(*apsrc);
+		    present = (gethashnode2(ht, *apsrc) != NULL);
+		    if (intersect ? present : !present) {
+			if (ap != apsrc) {
+			    *ap = *apsrc;
+			}
+			ap++;
+		    }
+		}
+		*ap = NULL;
+	    } else {
+		if (vunset) {
+		    if (unset(UNSET)) {
+			*idend = '\0';
+			zerr("%s: parameter not set", idbeg);
+			deletehashtable(ht);
+			return NULL;
+		    }
+		    val = dupstring("");
+		} else {
+		    present = (gethashnode2(ht, val) != NULL);
+		    if (intersect ? !present : present)
+			val = dupstring("");
+		}
+	    }
+	    deletehashtable(ht);
+	}
     } else {			/* no ${...=...} or anything, but possible modifiers. */
 	/*
 	 * Handler ${+...}.  TODO: strange, why do we handle this only
Index: Test/D04parameter.ztst
===================================================================
RCS file: /cvsroot/zsh/zsh/Test/D04parameter.ztst,v
retrieving revision 1.64
diff -p -u -r1.64 D04parameter.ztst
--- Test/D04parameter.ztst	10 Apr 2012 01:17:03 -0000	1.64
+++ Test/D04parameter.ztst	20 Apr 2012 19:15:07 -0000
@@ -169,6 +169,29 @@
 >a-string-with-slashes
 >a-string-with-slashes
 
+  args=('one' '#foo' '(bar' "'three'" two)
+  mod=('#foo' '(bar' "'three'" sir_not_appearing_in_this_film)
+  print ${args:|mod}
+  print ${args:*mod}
+  print "${(@)args:|mod}"
+  print "${(@)args:*mod}"
+  args=(two words)
+  mod=('one word' 'two words')
+  print "${args:|mod}"
+  print "${args:*mod}"
+  scalar='two words'
+  print ${scalar:|mod}
+  print ${scalar:*mod}
+0:"|" array exclusion and "*" array intersection
+>one two
+>#foo (bar 'three'
+>one two
+>#foo (bar 'three'
+>
+>two words
+>
+>two words
+
   str1='twocubed'
   array=(the number of protons in an oxygen nucleus)
   print $#str1 ${#str1} "$#str1 ${#str1}" $#array ${#array} "$#array ${#array}"


-- 
Peter Stephenson <p.w.stephenson@xxxxxxxxxxxx>
Web page now at http://homepage.ntlworld.com/p.w.stephenson/



Messages sorted by: Reverse Date, Date, Thread, Author