Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

PATCH: mixed [key]=val and native syntax



Please do suggest any hairier tests for this you can think of.

The basic idea is that we detect [key]=val element by element in
prefork(), and for each one detected stick a special marker in front; if
any are found a flag is passed up so that native array assignment with
no such entries is only minimally penalised.  I think this is good
enough to handle *all* cases.

I don't think the new syntax needs to be blisteringly fast anyway, so
I'm not currently intending to optimise further.

One possible enhancement is I'm not yet bothering to check for

typeset -A assoc=(key1 [key2_or_maybe_not_oops]=val2_or_maybe_not_oops oops_again)

so it gets nodded through.  I think the answer may be "so sue me".  But
not literally.

pws

commit 27f0fed3d3efc3c0854287a5f4f992bba320efa9
Author: Peter Stephenson <p.w.stephenson@xxxxxxxxxxxx>
Date:   Sat Sep 23 18:17:51 2017 +0100

    Updates for ksh array element syntax.
    
    Move detection of key/value pairs down into prefork().
    
    Detect normal array assignment and [key]=val array assignemnt
    separately.  Mark key / value pairs with Marker and pass up flag.  Deal
    with marked triads specially later on.

diff --git a/Src/Zle/zle_tricky.c b/Src/Zle/zle_tricky.c
index 5a9cccb..caeef76 100644
--- a/Src/Zle/zle_tricky.c
+++ b/Src/Zle/zle_tricky.c
@@ -2268,7 +2268,7 @@ doexpansion(char *s, int lst, int olst, int explincmd)
 	int ng = opts[NULLGLOB];
 
 	opts[NULLGLOB] = 1;
-	globlist(vl, 1);
+	globlist(vl, PREFORK_NO_UNTOK);
 	opts[NULLGLOB] = ng;
     }
     if (errflag)
diff --git a/Src/exec.c b/Src/exec.c
index 31edfab..bd242d1 100644
--- a/Src/exec.c
+++ b/Src/exec.c
@@ -2389,60 +2389,6 @@ addfd(int forked, int *save, struct multio **mfds, int fd1, int fd2, int rflag,
     }
 }
 
-/* Check for array assignent with entries like [key]=val.
- *
- * All entries or none must match this form, else error and return 0.
- *
- * Convert list to alternate key / val form, perform
- * appropriate substitution, and return 1 if found.
- *
- * Caller to check errflag.
- */
-
-/**/
-static int
-keyvalpairarray(LinkList vl, int htok)
-{
-    char *start, *end, *dat;
-    LinkNode ve, next;
-
-    if (vl &&
-	(ve = firstnode(vl)) &&
-	(start = (char *)getdata(ve)) &&
-	start[0] == Inbrack &&
-	(end = strchr(start+1, Outbrack)) &&
-	end[1] == Equals) {
-	for (;;) {
-	    *end = '\0';
-	    next = nextnode(ve);
-
-	    dat = start + 1;
-	    if (htok)
-		singsub(&dat);
-	    untokenize(dat);
-	    setdata(ve, dat);
-	    dat = end + 2;
-	    if (htok)
-		singsub(&dat);
-	    untokenize(dat);
-	    insertlinknode(vl, ve, dat);
-	    ve = next;
-	    if (!ve)
-		break;
-	    if (!(start = (char *)getdata(ve)) ||
-		start[0] != Inbrack ||
-		!(end = strchr(start+1, Outbrack)) ||
-		end[1] != Equals) {
-		zerr("bad array element, expected [key]=value: %s",
-		     start);
-		return 0;
-	    }
-	}
-	return 1;
-    }
-    return 0;
-}
-
 /**/
 static void
 addvars(Estate state, Wordcode pc, int addflags)
@@ -2484,10 +2430,6 @@ addvars(Estate state, Wordcode pc, int addflags)
 	    vl = &svl;
 	} else {
 	    vl = ecgetlist(state, WC_ASSIGN_NUM(ac), EC_DUPTOK, &htok);
-	    if (keyvalpairarray(vl, htok)) {
-		myflags |= ASSPM_KEY_VALUE;
-		htok = 0;
-	    }
 	    if (errflag) {
 		state->pc = opc;
 		return;
@@ -2495,25 +2437,28 @@ addvars(Estate state, Wordcode pc, int addflags)
 	}
 
 	if (vl && htok) {
+	    int prefork_ret = 0;
 	    prefork(vl, (isstr ? (PREFORK_SINGLE|PREFORK_ASSIGN) :
-			 PREFORK_ASSIGN), NULL);
+			 PREFORK_ASSIGN), &prefork_ret);
 	    if (errflag) {
 		state->pc = opc;
 		return;
 	    }
+	    if (prefork_ret & PREFORK_KEY_VALUE)
+		myflags |= ASSPM_KEY_VALUE;
 	    if (!isstr || (isset(GLOBASSIGN) && isstr &&
 			   haswilds((char *)getdata(firstnode(vl))))) {
-		globlist(vl, 0);
+		globlist(vl, prefork_ret);
 		/* Unset the parameter to force it to be recreated
 		 * as either scalar or array depending on how many
 		 * matches were found for the glob.
 		 */
 		if (isset(GLOBASSIGN) && isstr)
-		    unsetparam(name);
-	    }
-	    if (errflag) {
-		state->pc = opc;
-		return;
+			unsetparam(name);
+		if (errflag) {
+		    state->pc = opc;
+		    return;
+		}
 	    }
 	}
 	if (isstr && (empty(vl) || !nextnode(firstnode(vl)))) {
@@ -4030,16 +3975,17 @@ execcmd_exec(Estate state, Execcmd_params eparams,
 					  EC_DUPTOK, &htok);
 			    if (asg->value.array)
 			    {
-				if (keyvalpairarray(asg->value.array, 1))
-				    asg->flags |= ASG_KEY_VALUE;
-				else if (!errflag) {
+				if (!errflag) {
+				    int prefork_ret = 0;
 				    prefork(asg->value.array, PREFORK_ASSIGN,
-					    NULL);
+					    &prefork_ret);
 				    if (errflag) {
 					state->pc = opc;
 					break;
 				    }
-				    globlist(asg->value.array, 0);
+				    if (prefork_ret & PREFORK_KEY_VALUE)
+					asg->flags |= ASG_KEY_VALUE;
+				    globlist(asg->value.array, prefork_ret);
 				}
 				if (errflag) {
 				    state->pc = opc;
diff --git a/Src/params.c b/Src/params.c
index d628ddf..be809c4 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -2704,7 +2704,7 @@ setarrvalue(Value v, char **val)
 	    v->pm->gsu.a->setfn(v->pm, val);
     } else if (v->start == -1 && v->end == 0 &&
     	    PM_TYPE(v->pm->node.flags) == PM_HASHED) {
-    	arrhashsetfn(v->pm, val, 1);
+    	arrhashsetfn(v->pm, val, ASSPM_AUGMENT);
     } else if ((PM_TYPE(v->pm->node.flags) == PM_HASHED)) {
 	freearray(val);
 	zerr("%s: attempt to set slice of associative array",
@@ -3186,11 +3186,24 @@ assignaparam(char *s, char **val, int flags)
     if (flags & ASSPM_WARN)
 	check_warn_pm(v->pm, "array", created, may_warn_about_nested_vars);
 
+    /*
+     * At this point, we may have array entries consisting of
+     * - a Marker element --- normally allocated array entry but
+     *   with just Marker char and null
+     * - an array index element --- as normal for associative array,
+     *   but non-standard for normal array which we handle now.
+     * - a value for the indexed element.
+     * This only applies if the flag ASSPM_KEY_VALUE is passed in,
+     * indicating prefork() detected this syntax.
+     *
+     * For associative arrays we just junk the Makrer elements when
+     * we perform the assignment.
+     */
     if ((flags & ASSPM_KEY_VALUE) && (PM_TYPE(v->pm->node.flags) & PM_ARRAY)) {
 	/*
 	 * This is an ordinary array with key / value pairs.
 	 */
-	int maxlen, origlen;
+	int maxlen, origlen, nextind;
 	char **aptr, **fullval;
 	zlong *subscripts = (zlong *)zhalloc(arrlen(val) * sizeof(zlong));
 	zlong *iptr = subscripts;
@@ -3199,19 +3212,27 @@ assignaparam(char *s, char **val, int flags)
 	} else {
 	    maxlen = origlen = 0;
 	}
-	for (aptr = val; *aptr && aptr[1]; aptr += 2) {
-	    *iptr = mathevali(*aptr);
-	    if (*iptr < 0 ||
-		(!isset(KSHARRAYS) && *iptr == 0)) {
-		unqueue_signals();
-		zerr("bad subscript for direct array assignment: %s", *aptr);
-		return NULL;
+	nextind = 0;
+	for (aptr = val; *aptr; ) {
+	    if (**aptr == Marker) {
+		*iptr = mathevali(*++aptr);
+		if (*iptr < 0 ||
+		    (!isset(KSHARRAYS) && *iptr == 0)) {
+		    unqueue_signals();
+		    zerr("bad subscript for direct array assignment: %s", *aptr);
+		    return NULL;
+		}
+		if (!isset(KSHARRAYS))
+		    --*iptr;
+		nextind = *iptr + 1;
+		++iptr;
+		aptr += 2;
+	    } else {
+		++nextind;
+		++aptr;
 	    }
-	    if (!isset(KSHARRAYS))
-		--*iptr;
-	    if (*iptr + 1 > maxlen)
-		maxlen = *iptr + 1;
-	    ++iptr;
+	    if (nextind > maxlen)
+		maxlen = nextind;
 	}
 	fullval = zshcalloc((maxlen+1) * sizeof(char *));
 	if (!fullval) {
@@ -3227,10 +3248,19 @@ assignaparam(char *s, char **val, int flags)
 	    }
 	}
 	iptr = subscripts;
-	for (aptr = val; *aptr && aptr[1]; aptr += 2) {
-	    zsfree(*aptr);
-	    fullval[*iptr] = aptr[1];
-	    ++iptr;
+	nextind = 0;
+	for (aptr = val; *aptr; ++aptr) {
+	    if (**aptr == Marker) {
+		zsfree(*aptr);
+		zsfree(*++aptr); /* Index, no longer needed */
+		fullval[*iptr] = *++aptr;
+		nextind = *iptr + 1;
+		++iptr;
+	    } else {
+		fullval[nextind] = *aptr;
+		++nextind;
+	    }
+	    /* aptr now on value in both cases */
 	}
 	if (*aptr) {		/* Shouldn't be possible */
 	    DPUTS(1, "Extra element in key / value array");
@@ -3741,30 +3771,38 @@ nullsethashfn(UNUSED(Param pm), HashTable x)
 /* Function to set value of an association parameter using key/value pairs */
 
 /**/
-mod_export void
-arrhashsetfn(Param pm, char **val, int augment)
+static void
+arrhashsetfn(Param pm, char **val, int flags)
 {
     /* Best not to shortcut this by using the existing hash table,   *
      * since that could cause trouble for special hashes.  This way, *
      * it's up to pm->gsu.h->setfn() what to do.                     */
-    int alen = arrlen(val);
+    int alen = 0;
     HashTable opmtab = paramtab, ht = 0;
-    char **aptr = val;
+    char **aptr;
     Value v = (Value) hcalloc(sizeof *v);
     v->end = -1;
 
+    for (aptr = val; *aptr; ++aptr)
+    {
+	if (**aptr != Marker)
+	    ++alen;
+    }
+
     if (alen % 2) {
 	freearray(val);
 	zerr("bad set of key/value pairs for associative array");
 	return;
     }
-    if (augment) {
+    if (flags & ASSPM_AUGMENT) {
 	ht = paramtab = pm->gsu.h->getfn(pm);
     }
-    if (alen && (!augment || !paramtab)) {
+    if (alen && (!(flags & ASSPM_AUGMENT) || !paramtab)) {
 	ht = paramtab = newparamtable(17, pm->node.nam);
     }
-    while (*aptr) {
+    for (aptr = val; *aptr; ) {
+	if (**aptr == Marker)
+	    zsfree(*aptr++);
 	/* The parameter name is ztrdup'd... */
 	v->pm = createparam(*aptr, PM_SCALAR|PM_UNSET);
 	/*
@@ -3774,6 +3812,10 @@ arrhashsetfn(Param pm, char **val, int augment)
 	if (!v->pm)
 	    v->pm = (Param) paramtab->getnode(paramtab, *aptr);
 	zsfree(*aptr++);
+	if (**aptr == Marker) {
+	    /* TBD: Tsk, shouldn't be allowed. */
+	    zsfree(*aptr++);
+	}
 	/* ...but we can use the value without copying. */
 	setstrvalue(v, *aptr++);
     }
diff --git a/Src/subst.c b/Src/subst.c
index 5df2a8b..357dc91 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -35,6 +35,41 @@
 /**/
 char nulstring[] = {Nularg, '\0'};
 
+/* Check for array assignent with entries like [key]=val.
+ *
+ * Insert Marker node, convert following nodes to list to alternate key
+ * / val form, perform appropriate substitution, and return last
+ * inserted (value) node if found.
+ *
+ * Caller to check errflag.
+ */
+
+/**/
+static LinkNode
+keyvalpairelement(LinkList list, LinkNode node)
+{
+    char *start, *end, *dat;
+
+    if ((start = (char *)getdata(node)) &&
+	start[0] == Inbrack &&
+	(end = strchr(start+1, Outbrack)) &&
+	end[1] == Equals) {
+	static char marker[2] = { Marker, '\0' };
+	*end = '\0';
+
+	dat = start + 1;
+	singsub(&dat);
+	untokenize(dat);
+	setdata(node, marker);
+	node = insertlinknode(list, node, dat);
+	dat = end + 2;
+	singsub(&dat);
+	untokenize(dat);
+	return insertlinknode(list, node, dat);
+    }
+    return NULL;
+}
+
 /* Do substitutions before fork. These are:
  *  - Process substitution: <(...), >(...), =(...)
  *  - Parameter substitution
@@ -46,17 +81,16 @@ char nulstring[] = {Nularg, '\0'};
  *
  * "flag"s contains PREFORK_* flags, defined in zsh.h.
  *
- * "ret_flags" is used to return values from nested parameter
- * substitions.  It may be NULL in which case PREFORK_SUBEXP
- * must not appear in flags; any return value from below
- * will be discarded.
+ * "ret_flags" is used to return PREFORK_* values from nested parameter
+ * substitions.  It may be NULL in which case PREFORK_SUBEXP must not
+ * appear in flags; any return value from below will be discarded.
  */
 
 /**/
 mod_export void
 prefork(LinkList list, int flags, int *ret_flags)
 {
-    LinkNode node, stop = 0;
+    LinkNode node, insnode, stop = 0;
     int keep = 0, asssub = (flags & PREFORK_TYPESET) && isset(KSHTYPESET);
     int ret_flags_local = 0;
     if (!ret_flags)
@@ -64,6 +98,14 @@ prefork(LinkList list, int flags, int *ret_flags)
 
     queue_signals();
     for (node = firstnode(list); node; incnode(node)) {
+	if ((flags & (PREFORK_SINGLE|PREFORK_ASSIGN)) == PREFORK_ASSIGN &&
+	    (insnode = keyvalpairelement(list, node))) {
+	    node = insnode;
+	    *ret_flags |= PREFORK_KEY_VALUE;
+	    continue;
+	}
+	if (errflag)
+	    return;
 	if (isset(SHFILEEXPANSION)) {
 	    /*
 	     * Here and below we avoid taking the address
@@ -400,16 +442,31 @@ quotesubst(char *str)
     return str;
 }
 
+/* Glob entries of a linked list.
+ *
+ * flags are from PREFORK_*, but only two are handled:
+ * - PREORK_NO_UNTOK: pass into zglob() a flag saying do not untokenise.
+ * - PREFORK_KEY_VALUE: look out for Marker / Key / Value list triads
+ *   and don't glob them.  The key and value should already have
+ *   been untokenised as they are not subject to further expansion.
+ */
+
 /**/
 mod_export void
-globlist(LinkList list, int nountok)
+globlist(LinkList list, int flags)
 {
     LinkNode node, next;
 
     badcshglob = 0;
     for (node = firstnode(list); !errflag && node; node = next) {
 	next = nextnode(node);
-	zglob(list, node, nountok);
+	if ((flags & PREFORK_KEY_VALUE) &&
+	    *(char *)getdata(node) == Marker) {
+	    /* Skip key / value pair */
+	    next = nextnode(nextnode(next));
+	} else {
+	    zglob(list, node, (flags & PREFORK_NO_UNTOK) != 0);
+	}
     }
     if (noerrs)
 	badcshglob = 0;
diff --git a/Src/zsh.h b/Src/zsh.h
index 27642f2..7f0e8a2 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -223,9 +223,14 @@ struct mathfunc {
  * tokens here.
  */
 /*
- * Marker used in paramsubst for rc_expand_param.
- * Also used in pattern character arrays as guaranteed not to
- * mark a character in a string.
+ * Marker is used in the following special circumstances:
+ * - In paramsubst for rc_expand_param.
+ * - In pattern character arrays as guaranteed not to mark a character in
+ *   a string.
+ * - In assignments with the ASSPM_KEY_VALUE flag set in order to
+ *   mark that there is a key / value pair following.
+ * All the above are local uses --- any case where the Marker has
+ * escaped beyond the context in question is an error.
  */
 #define Marker		((char) 0xa2)
 
@@ -1969,7 +1974,14 @@ enum {
     /* SHWORDSPLIT forced off in nested subst */
     PREFORK_NOSHWORDSPLIT = 0x20,
     /* Prefork is part of a parameter subexpression */
-    PREFORK_SUBEXP        = 0x40
+    PREFORK_SUBEXP        = 0x40,
+    /* Prefork detected an assignment list with [key]=value syntax,
+     * Only used on return from prefork, not meaningful passed down.
+     * Also used as flag to globlist.
+     */
+    PREFORK_KEY_VALUE     = 0x80,
+    /* No untokenise: used only as flag to globlist */
+    PREFORK_NO_UNTOK      = 0x100
 };
 
 /*
diff --git a/Test/B02typeset.ztst b/Test/B02typeset.ztst
index 7923ae3..d61d534 100644
--- a/Test/B02typeset.ztst
+++ b/Test/B02typeset.ztst
@@ -778,3 +778,25 @@
 >the key
 >the value
 >typeset -A keyvalhash=( ['*']='?not_globbed?' ['another key']='another value' ['the key']='the value' )
+
+  local keyvalarray=(first [2]=second third [6]=sixth seventh [5]=fifth new_sixth)
+  print -l "${keyvalarray[@]}"
+0:mixed syntax [key]=val with normal arrays
+>first
+>second
+>third
+>
+>fifth
+>new_sixth
+>seventh
+
+  # Not particularly useful, but should work
+  local -A keyvalhash=(1 one [2]=two 3 three)
+  integer i
+  for i in {1..${#keyvalhash}}; do
+    print $keyvalhash[$i]
+  done
+0:Mixed syntax with [key]=val for hash.
+>one
+>two
+>three
diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst
index 367bca1..b21d2d0 100644
--- a/Test/D04parameter.ztst
+++ b/Test/D04parameter.ztst
@@ -2275,3 +2275,37 @@ F:behavior, see http://austingroupbugs.net/view.php?id=888
 >third value
 >4fourth element\
 >fourth value
+
+  local keyvalarray
+  keyvalarray=(first [2]=second third [6]=sixth seventh [5]=fifth new_sixth)
+  print -l "${keyvalarray[@]}"
+0:mixed syntax [key]=val with normal arrays
+>first
+>second
+>third
+>
+>fifth
+>new_sixth
+>seventh
+
+  # Not particularly useful, but should work
+  local -A keyvalhash
+  keyvalhash=(1 one [2]=two 3 three)
+  integer i
+  for i in {1..${#keyvalhash}}; do
+    print $keyvalhash[$i]
+  done
+0:Mixed syntax with [key]=val for hash.
+>one
+>two
+>three
+
+  touch KVA1one KVA2two KVA3three
+  local keyvalarray
+  keyvalarray=(KVA* [4]=*)
+  print -l "${keyvalarray[@]}"
+0:Globbing in non-[key]=val parts of mixed syntax.
+>KVA1one
+>KVA2two
+>KVA3three
+>*



Messages sorted by: Reverse Date, Date, Thread, Author