Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: PATCH: [key]=value syntax, work in progress



On Mon, 11 Sep 2017 21:51:15 +0100
Peter Stephenson <p.w.stephenson@xxxxxxxxxxxx> wrote:
> Very early days in an attempt to support the [key]=value syntax in array
> assignment.
> 
> Please let me know now of anything that is going in the wrong direction
> or based on a misunderstanding or just plain wrong.

(I take silence to mean "grmf why are you even bothering to do this
frmkfplp", with consontantal clusters different by local factors.)

Seems to be basically done.  Was expecting more of a challenge...

With a bit of luck (which we don't usually get), this shouldn't interact
too badly with anything there already, so I ought to be able to commit
this before I go on holiday at the weekend.

Please at least read the initial description paragraph to see if you
think it needs to work differently.

pws

diff --git a/Doc/Zsh/params.yo b/Doc/Zsh/params.yo
index 817496b..5a6c85b 100644
--- a/Doc/Zsh/params.yo
+++ b/Doc/Zsh/params.yo
@@ -91,13 +91,32 @@ cindex(array assignment)
 ifzman()
 indent(tt(set -A) var(name) var(value) ...)
 indent(var(name)tt(=LPAR())var(value) ...tt(RPAR()))
+indent(var(name)tt(=LPAR())tt([)var(key)tt(]=)var(value) ...tt(RPAR()))
+
+In the third form, var(key) is an expression that will be evaluated in
+arithmetic context (in its simplest form, an integer) that gives the
+index of the element to be assigned with var(value).  In this form any
+elements not explicitly mentioned that come before the largest index to
+which a value is assigned will be assigned an empty string. The indices
+may be in any order.  Note that this syntax is strict: tt([) and tt(]=) must
+not be quoted, while var(key) may not consist of the unquoted string
+tt(]=), but is otherwise treated as a simple string.  Furthermore, all
+elements must match this form or an error is genereted; likewise, if the
+first entry does not match this form any later entry that does is taken
+as a simple value rather than a key / value pair. The enhanced forms of
+subscript expression that may be used when directly subscripting a
+variable name, described in the section Array Subscripts below, are not
+available.  Both var(key) and var(value) undergo all forms of expansion
+allowed for single word substitutions (this does not include filename
+generation).
 
 If no parameter var(name) exists, an ordinary array parameter is created.
 If the parameter var(name) exists and is a scalar, it is replaced by a new
 array.  To append to an array without changing the existing values, use
-the syntax:
+one of the following:
 ifzman()
 indent(var(name)tt(+=LPAR())var(value) ...tt(RPAR()))
+indent(var(name)tt(+=LPAR())tt([)var(key)tt(]=)var(value) ...tt(RPAR()))
 
 Within the parentheses on the right hand side of either form of the
 assignment, newlines and semicolons are treated the same as white space,
@@ -118,12 +137,14 @@ is interpreted as alternating keys and values:
 ifzman()
 indent(tt(set -A) var(name) var(key) var(value) ...)
 indent(var(name)tt(=LPAR())var(key) var(value) ...tt(RPAR()))
+indent(var(name)tt(=LPAR())tt([)var(key)tt(]=)var(value) ...tt(RPAR()))
 
 Every var(key) must have a var(value) in this case.  Note that this
 assigns to the entire array, deleting any elements that do not appear in
 the list.  The append syntax may also be used with an associative array:
 ifzman()
 indent(var(name)tt(+=LPAR())var(key) var(value) ...tt(RPAR()))
+indent(var(name)tt(+=LPAR())tt([)var(key)tt(]=)var(value) ...tt(RPAR()))
 
 This adds a new key/value pair if the key is not already present, and
 replaces the value for the existing key if it is.
diff --git a/Src/builtin.c b/Src/builtin.c
index 0c2a62a..f5ccf52 100644
--- a/Src/builtin.c
+++ b/Src/builtin.c
@@ -450,15 +450,35 @@ execbuiltin(LinkList args, LinkList assigns, Builtin bn)
 		    Asgment asg = (Asgment)node;
 		    fputc(' ', xtrerr);
 		    quotedzputs(asg->name, xtrerr);
-		    if (asg->is_array) {
-			LinkNode arrnode;
+		    if (asg->flags & ASG_ARRAY) {
 			fprintf(xtrerr, "=(");
 			if (asg->value.array) {
-			    for (arrnode = firstnode(asg->value.array);
-				 arrnode;
-				 incnode(arrnode)) {
-				fputc(' ', xtrerr);
-				quotedzputs((char *)getdata(arrnode), xtrerr);
+			    if (asg->flags & ASG_KEY_VALUE) {
+				LinkNode keynode, valnode;
+				keynode = firstnode(asg->value.array);
+				for (;;) {
+				    if (!keynode)
+					break;
+				    valnode = nextnode(keynode);
+				    if (!valnode)
+					break;
+				    fputc('[', xtrerr);
+				    quotedzputs((char *)getdata(keynode),
+						xtrerr);
+				    fprintf(stderr, "]=");
+				    quotedzputs((char *)getdata(valnode),
+						xtrerr);
+				    keynode = nextnode(valnode);
+				}
+			    } else {
+				LinkNode arrnode;
+				for (arrnode = firstnode(asg->value.array);
+				     arrnode;
+				     incnode(arrnode)) {
+				    fputc(' ', xtrerr);
+				    quotedzputs((char *)getdata(arrnode),
+						xtrerr);
+				}
 			    }
 			}
 			fprintf(xtrerr, " )");
@@ -1519,7 +1539,7 @@ bin_fc(char *nam, char **argv, Options ops, int func)
 	    asgl = a;
 	}
 	a->name = *argv;
-	a->is_array = 0;
+	a->flags = 0;
 	a->value.scalar = s;
 	a->node.next = a->node.prev = NULL;
 	argv++;
@@ -1910,7 +1930,7 @@ getasg(char ***argvp, LinkList assigns)
 	return NULL;
     }
     asg.name = s;
-    asg.is_array = 0;
+    asg.flags = 0;
 
     /* search for `=' */
     for (; *s && *s != '='; s++);
@@ -2171,7 +2191,7 @@ typeset_single(char *cname, char *pname, Param pm, UNUSED(int func),
      *   ii. we are creating a new local parameter
      */
     if (usepm) {
-	if (asg->is_array ?
+	if ((asg->flags & ASG_ARRAY) ?
 	    !(PM_TYPE(pm->node.flags) & (PM_ARRAY|PM_HASHED)) :
 	    (asg->value.scalar && (PM_TYPE(pm->node.flags &
 					   (PM_ARRAY|PM_HASHED))))) {
@@ -2241,10 +2261,11 @@ typeset_single(char *cname, char *pname, Param pm, UNUSED(int func),
 	    if (asg->value.scalar &&
 		!(pm = assignsparam(pname, ztrdup(asg->value.scalar), 0)))
 		return NULL;
-	} else if (asg->is_array) {
+	} else if (asg->flags & ASG_ARRAY) {
+	    int flags = (asg->flags & ASG_KEY_VALUE) ? ASSPM_KEY_VALUE : 0;
 	    if (!(pm = assignaparam(pname, asg->value.array ?
 				 zlinklist2array(asg->value.array) :
-				 mkarray(NULL), 0)))
+				 mkarray(NULL), flags)))
 		return NULL;
 	}
 	if (errflag)
@@ -2255,7 +2276,7 @@ typeset_single(char *cname, char *pname, Param pm, UNUSED(int func),
 	return pm;
     }
 
-    if (asg->is_array ?
+    if ((asg->flags & ASG_ARRAY) ?
 	!(on & (PM_ARRAY|PM_HASHED)) :
 	(asg->value.scalar && (on & (PM_ARRAY|PM_HASHED)))) {
 	zerrnam(cname, "%s: inconsistent type for assignment", pname);
@@ -2287,7 +2308,7 @@ typeset_single(char *cname, char *pname, Param pm, UNUSED(int func),
 	 */
 	if (!ASG_VALUEP(asg) && !((pm->node.flags|on) & (PM_ARRAY|PM_HASHED))) {
 	    asg->value.scalar = dupstring(getsparam(pname));
-	    asg->is_array = 0;
+	    asg->flags = 0;
 	}
 	/* pname may point to pm->nam which is about to disappear */
 	pname = dupstring(pname);
@@ -2396,13 +2417,14 @@ typeset_single(char *cname, char *pname, Param pm, UNUSED(int func),
 		      ztrdup(asg->value.scalar ? asg->value.scalar : ""), 0)))
 		return NULL;
 	    dont_set = 1;
-	    asg->is_array = 0;
+	    asg->flags = 0;
 	    keeplocal = 0;
 	    on = pm->node.flags;
 	} else if (PM_TYPE(on) == PM_ARRAY && ASG_ARRAYP(asg)) {
+	    int flags = (asg->flags & ASG_KEY_VALUE) ? ASSPM_KEY_VALUE : 0;
 	    if (!(pm = assignaparam(pname, asg->value.array ?
 				    zlinklist2array(asg->value.array) :
-				    mkarray(NULL), 0)))
+				    mkarray(NULL), flags)))
 		return NULL;
 	    dont_set = 1;
 	    keeplocal = 0;
@@ -2479,6 +2501,7 @@ typeset_single(char *cname, char *pname, Param pm, UNUSED(int func),
 	Param ipm = pm;
 	if (pm->node.flags & (PM_ARRAY|PM_HASHED)) {
 	    char **arrayval;
+	    int flags = (asg->flags & ASG_KEY_VALUE) ? ASSPM_KEY_VALUE : 0;
 	    if (!ASG_ARRAYP(asg)) {
 		/*
 		 * Attempt to assign a scalar value to an array.
@@ -2497,7 +2520,7 @@ typeset_single(char *cname, char *pname, Param pm, UNUSED(int func),
 		arrayval = zlinklist2array(asg->value.array);
 	    else
 		arrayval = mkarray(NULL);
-	    if (!(pm=assignaparam(pname, arrayval, 0)))
+	    if (!(pm=assignaparam(pname, arrayval, flags)))
 		return NULL;
 	} else {
 	    DPUTS(ASG_ARRAYP(asg), "BUG: inconsistent array value for scalar");
@@ -2750,13 +2773,15 @@ bin_typeset(char *name, char **argv, LinkList assigns, Options ops, int func)
 		     * Already tied in the fashion requested.
 		     */
 		    struct tieddata *tdp = (struct tieddata*)pm->u.data;
+		    int flags = (asg->flags & ASG_KEY_VALUE) ?
+			ASSPM_KEY_VALUE : 0;
 		    /* Update join character */
 		    tdp->joinchar = joinchar;
 		    if (asg0.value.scalar)
 			assignsparam(asg0.name, ztrdup(asg0.value.scalar), 0);
 		    else if (asg->value.array)
 			assignaparam(
-			    asg->name, zlinklist2array(asg->value.array), 0);
+			    asg->name, zlinklist2array(asg->value.array),flags);
 		    return 0;
 		} else {
 		    zwarnnam(name, "can't tie already tied scalar: %s",
@@ -2778,7 +2803,7 @@ bin_typeset(char *name, char **argv, LinkList assigns, Options ops, int func)
 	 * to be exported properly.
 	 */
 	asg2.name = asg->name;
-	asg2.is_array = 0;
+	asg2.flags = 0;
 	asg2.value.array = (LinkList)0;
 	if (!(apm=typeset_single(name, asg->name,
 				 (Param)paramtab->getnode(paramtab,
@@ -2816,9 +2841,10 @@ bin_typeset(char *name, char **argv, LinkList assigns, Options ops, int func)
 	if (apm->ename)
 	    zsfree(apm->ename);
 	apm->ename = ztrdup(asg0.name);
-	if (asg->value.array)
-	    assignaparam(asg->name, zlinklist2array(asg->value.array), 0);
-	else if (oldval)
+	if (asg->value.array) {
+	    int flags = (asg->flags & ASG_KEY_VALUE) ? ASSPM_KEY_VALUE : 0;
+	    assignaparam(asg->name, zlinklist2array(asg->value.array), flags);
+	} else if (oldval)
 	    assignsparam(asg0.name, oldval, 0);
 	unqueue_signals();
 
diff --git a/Src/exec.c b/Src/exec.c
index e2432fd..d136766 100644
--- a/Src/exec.c
+++ b/Src/exec.c
@@ -2389,6 +2389,60 @@ addfd(int forked, int *save, struct multio **mfds, int fd1, int fd2, int rflag,
     }
 }
 
+/* Check for array assignent with entries like [key]=val.
+ *
+ * All entries or none must match this form, else error and return 0.
+ *
+ * Convert list to alternate key / val form, perform
+ * appropriate substitution, and return 1 if found.
+ *
+ * Caller to check errflag.
+ */
+
+/**/
+static int
+keyvalpairarray(LinkList vl, int htok)
+{
+    char *start, *end, *dat;
+    LinkNode ve, next;
+
+    if (vl &&
+	(ve = firstnode(vl)) &&
+	(start = (char *)getdata(ve)) &&
+	start[0] == Inbrack &&
+	(end = strchr(start+1, Outbrack)) &&
+	end[1] == Equals) {
+	for (;;) {
+	    *end = '\0';
+	    next = nextnode(ve);
+
+	    dat = start + 1;
+	    if (htok)
+		singsub(&dat);
+	    untokenize(dat);
+	    setdata(ve, dat);
+	    dat = end + 2;
+	    if (htok)
+		singsub(&dat);
+	    untokenize(dat);
+	    insertlinknode(vl, ve, dat);
+	    ve = next;
+	    if (!ve)
+		break;
+	    if (!(start = (char *)getdata(ve)) ||
+		start[0] != Inbrack ||
+		!(end = strchr(start+1, Outbrack)) ||
+		end[1] != Equals) {
+		zerr("bad array element, expected [key]=value: %s",
+		     start);
+		return 0;
+	    }
+	}
+	return 1;
+    }
+    return 0;
+}
+
 /**/
 static void
 addvars(Estate state, Wordcode pc, int addflags)
@@ -2428,8 +2482,17 @@ addvars(Estate state, Wordcode pc, int addflags)
 	if ((isstr = (WC_ASSIGN_TYPE(ac) == WC_ASSIGN_SCALAR))) {
 	    init_list1(svl, ecgetstr(state, EC_DUPTOK, &htok));
 	    vl = &svl;
-	} else
+	} else {
 	    vl = ecgetlist(state, WC_ASSIGN_NUM(ac), EC_DUPTOK, &htok);
+	    if (keyvalpairarray(vl, htok)) {
+		myflags |= ASSPM_KEY_VALUE;
+		htok = 0;
+	    }
+	    if (errflag) {
+		state->pc = opc;
+		return;
+	    }
+	}
 
 	if (vl && htok) {
 	    prefork(vl, (isstr ? (PREFORK_SINGLE|PREFORK_ASSIGN) :
@@ -3914,7 +3977,7 @@ execcmd_exec(Estate state, Execcmd_params eparams,
 				while ((data = ugetnode(&svl))) {
 				    char *ptr;
 				    asg = (Asgment)zhalloc(sizeof(struct asgment));
-				    asg->is_array = 0;
+				    asg->flags = 0;
 				    if ((ptr = strchr(data, '='))) {
 					*ptr++ = '\0';
 					asg->name = data;
@@ -3936,7 +3999,7 @@ execcmd_exec(Estate state, Execcmd_params eparams,
 			asg->name = name;
 			if (WC_ASSIGN_TYPE(ac) == WC_ASSIGN_SCALAR) {
 			    char *val = ecgetstr(state, EC_DUPTOK, &htok);
-			    asg->is_array = 0;
+			    asg->flags = 0;
 			    if (WC_ASSIGN_TYPE2(ac) == WC_ASSIGN_INC) {
 				/* Fake assignment, no value */
 				asg->value.scalar = NULL;
@@ -3961,18 +4024,23 @@ execcmd_exec(Estate state, Execcmd_params eparams,
 				asg->value.scalar = val;
 			    }
 			} else {
-			    asg->is_array = 1;
+			    asg->flags = ASG_ARRAY;
 			    asg->value.array =
 				ecgetlist(state, WC_ASSIGN_NUM(ac),
 					  EC_DUPTOK, &htok);
 			    if (asg->value.array)
 			    {
-				prefork(asg->value.array, PREFORK_ASSIGN, NULL);
-				if (errflag) {
-				    state->pc = opc;
-				    break;
+				if (keyvalpairarray(asg->value.array, 1))
+				    asg->flags |= ASG_KEY_VALUE;
+				else if (!errflag) {
+				    prefork(asg->value.array, PREFORK_ASSIGN,
+					    NULL);
+				    if (errflag) {
+					state->pc = opc;
+					break;
+				    }
+				    globlist(asg->value.array, 0);
 				}
-				globlist(asg->value.array, 0);
 				if (errflag) {
 				    state->pc = opc;
 				    break;
diff --git a/Src/params.c b/Src/params.c
index 6fbee88..05989a6 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -3185,6 +3185,65 @@ assignaparam(char *s, char **val, int flags)
 
     if (flags & ASSPM_WARN)
 	check_warn_pm(v->pm, "array", created, may_warn_about_nested_vars);
+
+    if ((flags & ASSPM_KEY_VALUE) && (PM_TYPE(v->pm->node.flags) & PM_ARRAY)) {
+	int maxlen, origlen;
+	char **aptr, **fullval, *dummy;
+	zlong *subscripts = (zlong *)zhalloc(arrlen(val) * sizeof(zlong));
+	zlong *iptr = subscripts;
+	if (flags & ASSPM_AUGMENT) {
+	    maxlen = origlen = arrlen(v->pm->gsu.a->getfn(v->pm));
+	} else {
+	    maxlen = origlen = 0;
+	}
+	for (aptr = val; *aptr && aptr[1]; aptr += 2) {
+	    *iptr = mathevalarg(*aptr, &dummy);
+	    if (*iptr < 0 ||
+		(!isset(KSHARRAYS) && *iptr == 0)) {
+		unqueue_signals();
+		zerr("bad subscript for direct array assignment: %s", *aptr);
+		return NULL;
+	    }
+	    if (!isset(KSHARRAYS))
+		--*iptr;
+	    if (*iptr + 1 > maxlen)
+		maxlen = *iptr + 1;
+	    ++iptr;
+	}
+	fullval = zshcalloc((maxlen+1) * sizeof(char *));
+	fullval[maxlen] = NULL;
+	if (flags & ASSPM_AUGMENT) {
+	    char **srcptr = v->pm->gsu.a->getfn(v->pm);
+	    for (aptr = fullval; aptr <= fullval + maxlen; aptr++) {
+		*aptr = ztrdup(*srcptr); 
+		srcptr++;
+	    }
+	}
+	iptr = subscripts;
+	for (aptr = val; *aptr && aptr[1]; aptr += 2) {
+	    zsfree(*aptr);
+	    fullval[*iptr] = aptr[1];
+	    ++iptr;
+	}
+	if (*aptr) {		/* Shouldn't be possible */
+	    DPUTS(1, "Extra element in key / value array");
+	    zsfree(*aptr);
+	}
+	free(val);
+	for (aptr = fullval; aptr < fullval + maxlen; aptr++) {
+	    /*
+	     * Remember we don't have sparse arrays but and they're null
+	     * terminated --- so any value we don't set has to be an
+	     * empty string.
+	     */
+	    if (!*aptr)
+		*aptr = ztrdup("");
+	}
+	setarrvalue(v, fullval);
+	unqueue_signals();
+	return v->pm;
+    }
+
     if (flags & ASSPM_AUGMENT) {
     	if (v->start == 0 && v->end == -1) {
 	    if (PM_TYPE(v->pm->node.flags) & PM_ARRAY) {
diff --git a/Src/zsh.h b/Src/zsh.h
index 1e982a6..27642f2 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -1217,17 +1217,25 @@ struct alias {
 struct asgment {
     struct linknode node;
     char *name;
-    int is_array;
+    int flags;
     union {
 	char *scalar;
 	LinkList array;
     } value;
 };
 
+/* Flags for flags element of asgment */
+enum {
+    /* Array value */
+    ASG_ARRAY = 1,
+    /* Key / value array pair */
+    ASG_KEY_VALUE = 2
+};
+
 /*
  * Assignment is array?
  */
-#define ASG_ARRAYP(asg) ((asg)->is_array)
+#define ASG_ARRAYP(asg) ((asg)->flags & ASG_ARRAY)
 
 /*
  * Assignment has value?
@@ -2060,6 +2068,11 @@ enum {
     ASSPM_WARN = (ASSPM_WARN_CREATE|ASSPM_WARN_NESTED),
     /* Import from environment, so exercise care evaluating value */
     ASSPM_ENV_IMPORT = 1 << 3,
+    /* Array is key / value pairs.
+     * This is normal for associative arrays but variant behaviour for
+     * normal arrays.
+     */
+    ASSPM_KEY_VALUE = 1 << 4
 };
 
 /* node for named directory hash table (nameddirtab) */
diff --git a/Test/B02typeset.ztst b/Test/B02typeset.ztst
index b27bb4f..ae21804 100644
--- a/Test/B02typeset.ztst
+++ b/Test/B02typeset.ztst
@@ -721,3 +721,58 @@
 # 'date' did not run.
 >Status is printed, 1
 *?*: failed to change user ID: *
+
+ typeset -A keyvalhash=([one]=eins [two]=zwei)
+ keyvalhash+=([three]=drei)
+ for key in ${(ok)keyvalhash}; do
+   print $key $keyvalhash[$key]
+ done
+0:[key]=val for hashes
+>one eins
+>three drei
+>two zwei
+
+  local keyvalarray=([1]=one [3]=three)
+  print -l "${keyvalarray[@]}"
+  keyvalarray+=([2]=two)
+  print -l "${keyvalarray[@]}"
+  local keyvalarray=([1]=one [3]=three)
+  print -l "${keyvalarray[@]}"
+0:[key]=val for normal arrays
+>one
+>
+>three
+>one
+>two
+>three
+>one
+>
+>three
+
+ touch foo Xnot_globbedX
+ inkey="another key" val="another value"
+ typeset -A keyvalhash=([$(echo the key)]=$(echo the value)
+                        [$inkey]=$val
+	                [*]=?not_globbed?)
+ for key in ${(ok)keyvalhash}; do
+   print -l $key $keyvalhash[$key]
+ done
+ typeset -A keyvalhash=([$(echo the key)]=$(echo the value)
+                        [$inkey]=$val
+	                [*]=?not_globbed?)
+ for key in ${(ok)keyvalhash}; do
+   print -l $key $keyvalhash[$key]
+ done
+0:Substitution in [key]=val syntax
+>*
+>?not_globbed?
+>another key
+>another value
+>the key
+>the value
+>*
+>?not_globbed?
+>another key
+>another value
+>the key
+>the value
diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst
index 8dbc1e8..723f081 100644
--- a/Test/D04parameter.ztst
+++ b/Test/D04parameter.ztst
@@ -2207,3 +2207,43 @@ F:behavior, see http://austingroupbugs.net/view.php?id=888
 0:(z) splitting with remaining tokens
 >foo-bar*thingy?
  
+ typeset -A keyvalhash
+ keyvalhash=([one]=eins [two]=zwei)
+ keyvalhash+=([three]=drei)
+ for key in ${(ok)keyvalhash}; do
+   print $key $keyvalhash[$key]
+ done
+0:[key]=val for hashes
+>one eins
+>three drei
+>two zwei
+
+  local keyvalarray
+  keyvalarray=([1]=one [3]=three)
+  print -l "${keyvalarray[@]}"
+  keyvalarray+=([2]=two)
+  print -l "${keyvalarray[@]}"
+0:[key]=val for normal arrays
+>one
+>
+>three
+>one
+>two
+>three
+
+ typeset -A keyvalhash
+ touch foo Xnot_globbedX
+ key="another key" val="another value"
+ keyvalhash=([$(echo the key)]=$(echo the value)
+             [$key]=$val
+	     [*]=?not_globbed?)
+ for key in ${(ok)keyvalhash}; do
+   print -l $key $keyvalhash[$key]
+ done
+0:Substitution in [key]=val syntax
+>*
+>?not_globbed?
+>another key
+>another value
+>the key
+>the value



Messages sorted by: Reverse Date, Date, Thread, Author