Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

[PATCH] TYPESET_TO_UNSET + misc.



Attached is the final (I hope) version of the changes I've been
pushing to the "declarednull" branch in git.  tl;dr with this option
set "typeset foo" leaves foo unset, in contrast to the default
behavior which assigns foo="".

For those who've lost track, this originated from the thread "Bug with
unset variables" in workers/47351 and the first pass at this approach
appeared in workers/47697.  The doc update includes mention of the
problems with searching for (or in) empty strings, from the thread at
workers/47729, and the new test file E03posix.ztst has "xfail" tests
for nine incompatibilities that were recently enumerated in a number
of messages on the list.  I considered making a couple of patches but
one would have been very small and the other would patch a
newly-created file, so it didn't seem worthwhile.

I chose TYPESET_TO_UNSET as the option to control this, to go along
with TYPESET_SILENT and because it works a bit better when negated as
NO_TYPESET_TO_UNSET.  E03posix.ztst runs with this option set to avoid
having to duplicate all the tests; that can be fixed later if
necessary.

I documented the default initialization behavior to contrast it with
the TYPESET_TO_UNSET behavior, rather than try to explain the latter
in isolation.
diff --git a/Completion/compinit b/Completion/compinit
index e81cd1604..1f2e7c634 100644
--- a/Completion/compinit
+++ b/Completion/compinit
@@ -165,6 +165,7 @@ _comp_options=(
     NO_posixidentifiers
     NO_shwordsplit
     NO_shglob
+    NO_typesettounset
     NO_warnnestedvar
     NO_warncreateglobal
 )
diff --git a/Doc/Zsh/builtins.yo b/Doc/Zsh/builtins.yo
index a7afe42cf..61dc6986f 100644
--- a/Doc/Zsh/builtins.yo
+++ b/Doc/Zsh/builtins.yo
@@ -1872,7 +1872,11 @@ ifnzman(noderef(Local Parameters))\
 retain their special attributes when made local.
 
 For each var(name)tt(=)var(value) assignment, the parameter
-var(name) is set to var(value).
+var(name) is set to var(value).  If the assignment is omitted and var(name)
+does em(not) refer to an existing parameter, a new parameter is intialized
+to empty string, zero, or empty array (as appropriate), em(unless) the
+shell option tt(TYPESET_TO_UNSET) is set.  When that option is set,
+the parameter attributes are recorded but the parameter remains unset.
 
 If the shell option tt(TYPESET_SILENT) is not set, for each remaining
 var(name) that refers to a parameter that is already set, the name and
diff --git a/Doc/Zsh/options.yo b/Doc/Zsh/options.yo
index 714e8a1a1..6e862fae8 100644
--- a/Doc/Zsh/options.yo
+++ b/Doc/Zsh/options.yo
@@ -1942,6 +1942,16 @@ If the option is set, they will only be shown when parameters are selected
 with the `tt(-m)' option.  The option `tt(-p)' is available whether or not
 the option is set.
 )
+pindex(TYPESET_TO_UNSET)
+pindex(NO_TYPESET_TO_UNSET)
+pindex(TYPESETTOUNSET)
+pindex(NOTYPESETTOUNSET)
+item(tt(TYPESET_TO_UNSET) <K> <S>)(
+When declaring a new parameter with any of the `tt(typeset)' family of
+related commands, the parameter remains unset unless and until a
+value is explicity assigned to it, either in the `tt(typeset)' command
+itself or as a later assignment statement.
+)
 pindex(VERBOSE)
 pindex(NO_VERBOSE)
 pindex(NOVERBOSE)
diff --git a/Doc/Zsh/params.yo b/Doc/Zsh/params.yo
index 36c1ae4c2..a9044336f 100644
--- a/Doc/Zsh/params.yo
+++ b/Doc/Zsh/params.yo
@@ -393,6 +393,11 @@ is compared to the pattern, and the first matching key found is the
 result.  On failure substitutes the length of the array plus one, as
 discussed under the description of `tt(r)', or the empty string for an
 associative array.
+
+Note: Although `tt(i)' may be applied to a scalar substitution to find
+the offset of a substring, the results are likely to be misleading when
+searching within substitutions that yield an empty string, or when
+searching for the empty substring.
 )
 item(tt(I))(
 Like `tt(i)', but gives the index of the last match, or all possible
diff --git a/Src/builtin.c b/Src/builtin.c
index 26335a2e8..6d119f7a5 100644
--- a/Src/builtin.c
+++ b/Src/builtin.c
@@ -2491,6 +2491,8 @@ typeset_single(char *cname, char *pname, Param pm, UNUSED(int func),
 		return NULL;
 	    }
 	}
+	if (isset(TYPESETTOUNSET))
+	    pm->node.flags |= PM_DEFAULTED;
     } else {
 	if (idigit(*pname))
 	    zerrnam(cname, "not an identifier: %s", pname);
@@ -2836,7 +2838,7 @@ bin_typeset(char *name, char **argv, LinkList assigns, Options ops, int func)
 	    unqueue_signals();
 	    return 1;
 	} else if (pm) {
-	    if (!(pm->node.flags & PM_UNSET)
+	    if ((!(pm->node.flags & PM_UNSET) || pm->node.flags & PM_DECLARED)
 		&& (locallevel == pm->level || !(on & PM_LOCAL))) {
 		if (pm->node.flags & PM_TIED) {
 		    if (PM_TYPE(pm->node.flags) != PM_SCALAR) {
@@ -2889,6 +2891,8 @@ bin_typeset(char *name, char **argv, LinkList assigns, Options ops, int func)
 	 *
 	 * Don't attempt to set it yet, it's too early
 	 * to be exported properly.
+	 *
+	 * This may create the array with PM_DEFAULTED.
 	 */
 	asg2.name = asg->name;
 	asg2.flags = 0;
@@ -2930,8 +2934,12 @@ bin_typeset(char *name, char **argv, LinkList assigns, Options ops, int func)
 	if (asg->value.array) {
 	    int flags = (asg->flags & ASG_KEY_VALUE) ? ASSPM_KEY_VALUE : 0;
 	    assignaparam(asg->name, zlinklist2array(asg->value.array, 1), flags);
-	} else if (oldval)
-	    assignsparam(asg0.name, oldval, 0);
+	} else if (asg0.value.scalar || oldval) {
+	    /* We have to undo what we did wrong with asg2 */
+	    apm->node.flags &= ~PM_DEFAULTED;
+	    if (oldval)
+		assignsparam(asg0.name, oldval, 0);
+	}
 	unqueue_signals();
 
 	return 0;
diff --git a/Src/options.c b/Src/options.c
index 6ea6290e5..783022591 100644
--- a/Src/options.c
+++ b/Src/options.c
@@ -259,6 +259,7 @@ static struct optname optns[] = {
 {{NULL, "transientrprompt",   0},			 TRANSIENTRPROMPT},
 {{NULL, "trapsasync",	      0},			 TRAPSASYNC},
 {{NULL, "typesetsilent",      OPT_EMULATE|OPT_BOURNE},	 TYPESETSILENT},
+{{NULL, "typesettounset",     OPT_EMULATE|OPT_BOURNE},	 TYPESETTOUNSET},
 {{NULL, "unset",	      OPT_EMULATE|OPT_BSHELL},	 UNSET},
 {{NULL, "verbose",	      0},			 VERBOSE},
 {{NULL, "vi",		      0},			 VIMODE},
diff --git a/Src/params.c b/Src/params.c
index 122f5da7d..33bbc54f6 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -2093,7 +2093,8 @@ fetchvalue(Value v, char **pptr, int bracks, int flags)
 	if (sav)
 	    *s = sav;
 	*pptr = s;
-	if (!pm || (pm->node.flags & PM_UNSET))
+	if (!pm || ((pm->node.flags & PM_UNSET) &&
+		    !(pm->node.flags & PM_DECLARED)))
 	    return NULL;
 	if (v)
 	    memset(v, 0, sizeof(*v));
@@ -3055,6 +3056,7 @@ assignsparam(char *s, char *val, int flags)
 	     * Don't warn about anything.
 	     */
 	    flags &= ~ASSPM_WARN;
+	    v->pm->node.flags &= ~PM_DEFAULTED;
 	}
 	*ss = '[';
 	v = NULL;
@@ -3080,6 +3082,7 @@ assignsparam(char *s, char *val, int flags)
     }
     if (flags & ASSPM_WARN)
 	check_warn_pm(v->pm, "scalar", created, 1);
+    v->pm->node.flags &= ~PM_DEFAULTED;
     if (flags & ASSPM_AUGMENT) {
 	if (v->start == 0 && v->end == -1) {
 	    switch (PM_TYPE(v->pm->node.flags)) {
@@ -3232,6 +3235,7 @@ assignaparam(char *s, char **val, int flags)
 
     if (flags & ASSPM_WARN)
 	check_warn_pm(v->pm, "array", created, may_warn_about_nested_vars);
+    v->pm->node.flags &= ~PM_DEFAULTED;
 
     /*
      * At this point, we may have array entries consisting of
@@ -3444,6 +3448,7 @@ sethparam(char *s, char **val)
 	    return NULL;
 	}
     check_warn_pm(v->pm, "associative array", checkcreate, 1);
+    v->pm->node.flags &= ~PM_DEFAULTED;
     setarrvalue(v, val);
     unqueue_signals();
     return v->pm;
@@ -3515,6 +3520,7 @@ assignnparam(char *s, mnumber val, int flags)
 	if (flags & ASSPM_WARN)
 	    check_warn_pm(v->pm, "numeric", 0, 1);
     }
+    v->pm->node.flags &= ~PM_DEFAULTED;
     setnumvalue(v, val);
     unqueue_signals();
     return v->pm;
@@ -3619,6 +3625,7 @@ unsetparam_pm(Param pm, int altflag, int exp)
     else
 	altremove = NULL;
 
+    pm->node.flags &= ~PM_DECLARED;	/* like ksh, not like bash */
     if (!(pm->node.flags & PM_UNSET))
 	pm->gsu.s->unsetfn(pm, exp);
     if (pm->env)
@@ -3652,6 +3659,8 @@ unsetparam_pm(Param pm, int altflag, int exp)
 	}
 
 	zsfree(altremove);
+	if (!(pm->node.flags & PM_SPECIAL))
+	    pm->gsu.s = &stdscalar_gsu;
     }
 
     /*
@@ -4116,6 +4125,11 @@ tiedarrsetfn(Param pm, char *x)
 
     if (*dptr->arrptr)
 	freearray(*dptr->arrptr);
+    else if (pm->ename) {
+	Param altpm = (Param) paramtab->getnode(paramtab, pm->ename);
+	if (altpm)
+	    altpm->node.flags &= ~PM_DEFAULTED;
+    }
     if (x) {
 	char sepbuf[3];
 	if (imeta(dptr->joinchar))
@@ -5035,6 +5049,7 @@ arrfixenv(char *s, char **t)
 
     if (isset(ALLEXPORT))
 	pm->node.flags |= PM_EXPORTED;
+    pm->node.flags &= ~PM_DEFAULTED;
 
     /*
      * Do not "fix" parameters that were not exported
@@ -5839,8 +5854,9 @@ printparamnode(HashNode hn, int printflags)
     Param peer = NULL;
 
     if (p->node.flags & PM_UNSET) {
-	if (printflags & (PRINT_POSIX_READONLY|PRINT_POSIX_EXPORT) &&
-	    p->node.flags & (PM_READONLY|PM_EXPORTED)) {
+	if ((printflags & (PRINT_POSIX_READONLY|PRINT_POSIX_EXPORT) &&
+	     p->node.flags & (PM_READONLY|PM_EXPORTED)) ||
+	    (p->node.flags & PM_DEFAULTED) == PM_DEFAULTED) {
 	    /*
 	     * Special POSIX rules: show the parameter as readonly/exported
 	     * even though it's unset, but with no value.
diff --git a/Src/subst.c b/Src/subst.c
index 96e0914eb..9928be0e9 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -2563,7 +2563,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
 	     * Handle the (t) flag: value now becomes the type
 	     * information for the parameter.
 	     */
-	    if (v && v->pm && !(v->pm->node.flags & PM_UNSET)) {
+	    if (v && v->pm && ((v->pm->node.flags & PM_DECLARED) ||
+			       !(v->pm->node.flags & PM_UNSET))) {
 		int f = v->pm->node.flags;
 
 		switch (PM_TYPE(f)) {
diff --git a/Src/zsh.h b/Src/zsh.h
index d70a4017c..af9b4fb67 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -1929,8 +1929,10 @@ struct tieddata {
 				   made read-only by the user               */
 #define PM_READONLY_SPECIAL (PM_SPECIAL|PM_READONLY|PM_RO_BY_DESIGN)
 #define PM_DONTIMPORT	(1<<22)	/* do not import this variable              */
+#define PM_DECLARED	(1<<22) /* explicitly named with typeset            */
 #define PM_RESTRICTED	(1<<23) /* cannot be changed in restricted mode     */
 #define PM_UNSET	(1<<24)	/* has null value                           */
+#define PM_DEFAULTED	(PM_DECLARED|PM_UNSET)
 #define PM_REMOVABLE	(1<<25)	/* special can be removed from paramtab     */
 #define PM_AUTOLOAD	(1<<26) /* autoloaded from module                   */
 #define PM_NORESTORE	(1<<27)	/* do not restore value of local special    */
@@ -2536,6 +2538,7 @@ enum {
     TRANSIENTRPROMPT,
     TRAPSASYNC,
     TYPESETSILENT,
+    TYPESETTOUNSET,
     UNSET,
     VERBOSE,
     VIMODE,
diff --git a/Test/D06subscript.ztst b/Test/D06subscript.ztst
index c1a8d79cf..adbd398c4 100644
--- a/Test/D06subscript.ztst
+++ b/Test/D06subscript.ztst
@@ -289,3 +289,8 @@ F:Regression test for workers/42297
 >14 24
 >b b
 >b?rbaz foob?r
+
+  i=1,3
+  [[ ${a[$i]} = ${a[i]} ]]
+0f:Math evaluation of commas in array subscripts
+F:In math, (($i)) should be the same as ((i)), see workers/47748.
diff --git a/Test/E01options.ztst b/Test/E01options.ztst
index 415f46cd7..72749e6ab 100644
--- a/Test/E01options.ztst
+++ b/Test/E01options.ztst
@@ -1451,3 +1451,18 @@ F:If this test fails at the first unsetopt, refer to P01privileged.ztst.
 0q:RM_STAR_SILENT
 *>zsh: sure you want to delete all 15 files in ${PWD:h}/options.tmp \[yn\]\? ${BEL}(|n)
 *>zsh: sure you want to delete (all <->|more than <->) files in / \[yn\]\? ${BEL}(|n)
+
+  () {
+    local var
+    print ${(t)var}
+  }
+0:(t) returns correct type
+>scalar-local
+
+  () {
+    readonly var
+    typeset -p var
+  }
+0:readonly with typeset -p
+F:compare E03posix.ztst
+>typeset -r var=''
diff --git a/Test/E03posix.ztst b/Test/E03posix.ztst
new file mode 100644
index 000000000..7db4c0c84
--- /dev/null
+++ b/Test/E03posix.ztst
@@ -0,0 +1,163 @@
+# Test POSIX-specific behavior
+# Currently this covers only POSIXBUILTINS, other behaviors are in their
+# more directly related sections
+#
+
+%prep
+ setopt POSIX_BUILTINS TYPESET_TO_UNSET
+
+%test
+
+ local parentenv=preserved
+ fn() {
+  typeset -h +g -m \*
+  unset -m \*
+  integer i=9
+  float -H f=9
+  declare -t scalar
+  declare -H -a array
+  typeset
+  typeset +
+ }
+ fn
+ echo $parentenv
+0:Parameter hiding and tagging, printing types and values
+>array local array
+>float local f
+>integer local i=9
+>local tagged scalar
+>array local array
+>float local f
+>integer local i
+>local tagged scalar
+>preserved
+
+  readonly foo=bar novalue
+  readonly -p
+0:readonly -p output (no readonly specials)
+>readonly foo=bar
+>readonly novalue
+
+  local -a myarray
+  typeset -p1 myarray
+  myarray=("&" sand '""' "" plugh)
+  typeset -p1 myarray
+0:typeset -p1 output for array
+>typeset -a myarray
+>typeset -a myarray=(
+>  '&'
+>  sand
+>  '""'
+>  ''
+>  plugh
+>)
+
+  local -A myhash
+  typeset -p1 myhash
+  myhash=([one]=two [three]= [four]="[]")
+  typeset -p1 myhash
+0:typeset -p1 output for associative array
+>typeset -A myhash
+>typeset -A myhash=(
+>  [four]='[]'
+>  [one]=two
+>  [three]=''
+>)
+
+  str=s
+  arr=(a)
+  typeset -A ass
+  ass=(a a)
+  integer i=0
+  float f=0
+  print ${(t)str} ${(t)arr} ${(t)ass} ${(t)i} ${(t)f}
+0:${(t)...}
+>scalar array association-local integer-local float-local
+
+  print $empty[(i)] $empty[(I)]
+0:(i) and (I) return nothing for empty array
+>
+
+  (
+  # reserved words are handled during parsing,
+  # hence eval...
+  disable -r typeset
+  eval '
+  setopt kshtypeset
+  ktvars=(ktv1 ktv2)
+  typeset ktfoo=`echo arg1 arg2` $ktvars
+  () {
+    local ktfoo
+    print $+ktv1 $+ktv2 $+ktv3 $+ktfoo
+  }
+  print $ktfoo
+  unsetopt kshtypeset
+  typeset noktfoo=`echo noktarg1 noktarg2`
+  print $noktfoo
+  print $+noktarg1 $+noktarg2
+  unset ktfoo ktv1 ktv2 noktfoo noktarg2
+  '
+  )
+0:KSH_TYPESET option
+>0 0 0 0
+>arg1 arg2
+>noktarg1
+>0 0
+
+  () {
+    local var
+    print ${(t)var}
+  }
+0:(t) returns correct type
+>scalar-local
+
+  () {
+    readonly var
+    typeset -p var
+  }
+0:readonly with typeset -p
+>typeset -g -r var
+
+# Tests expected to fail
+
+  echo -
+0f:A single "-" for echo does not end the arguments
+F:POSIX requires a solitary "-" to be a plain argument
+>-
+
+  ARGV0=sh $ZTST_testdir/../Src/zsh -c 'foreach() { true; }'
+-f:"foreach" is not a reserved word
+
+  ARGV0=sh $ZTST_testdir/../Src/zsh -c 'end() { true; }
+-f:"end" is not a reserved word
+
+  a='a:b:' ARGV0=sh $ZTST_testdir/../Src/zsh -c 'IFS=:; printf "<%s>\n" $a'
+0f:IFS is a separator, not a delimiter
+><a>
+><b>
+
+  a=$'\ra\r\rb' ARGV0=sh $ZTST_testdir/../Src/zsh -c 'IFS=:; printf "<%s>\n" $a'
+0f:All whitespace characters are "IFS whitespace"
+F:isspace('\r') is true so \r should behave like space, \t, \n
+F:This may also need to apply to multibyte whitespace
+><a>
+><b>
+
+  ARGV0=sh $ZTST_testdir/../Src/zsh -c 'IFS=2; printf "<%s>\n" $((11*11))'
+0f:IFS applies to math results (numbers treated as strings)
+><1>
+><1>
+
+  ARGV0=sh $ZTST_testdir/../Src/zsh -c 'inf=42; echo $((inf))'
+0f:All identifiers are variable references in POSIX arithmetic
+F:POSIX has neither math functions nor floating point
+>42
+
+  ARGV0=sh $ZTST_testdir/../Src/zsh -c 'EUID=10; echo "$EUID"'
+-f:EUID is not a special variable
+>10
+
+  ARGV0=sh $ZTST_testdir/../Src/zsh -c "printf '<%10s>\n' St$'\M-C\M-)'phane"
+0f:Width of %s is computed in bytes not characters
+F:This is considered a bugfix in zsh
+><  Stéphane>
diff --git a/Test/V10private.ztst b/Test/V10private.ztst
index a3a63867b..03e8259d5 100644
--- a/Test/V10private.ztst
+++ b/Test/V10private.ztst
@@ -19,14 +19,14 @@
  () {
   print $scalar_test
   private scalar_test
-  print $+scalar_test
+  typeset +m scalar_test
   unset scalar_test
   print $+scalar_test
  }
  print $scalar_test
 0:basic scope hiding
 >toplevel
->1
+>local scalar_test
 >0
 >toplevel
 
@@ -45,14 +45,14 @@
  print $+unset_test
  () {
   private unset_test
-  print $+unset_test
+  typeset +m unset_test
   unset_test=setme
   print $unset_test
  }
  print $+unset_test
 0:variable defined only in scope
 >0
->1
+>local unset_test
 >setme
 >0
 
@@ -62,13 +62,13 @@
   local -Pa array_test=(in function)
   () {
    private array_test
-   print $+array_test
+   typeset +m array_test
   }
   print $array_test
  }
  print $array_test
 0:nested scope with different type, correctly restored
->1
+>local array_test
 >in function
 >top level
 
diff --git a/Test/runtests.zsh b/Test/runtests.zsh
index 562234d91..b66d579b6 100644
--- a/Test/runtests.zsh
+++ b/Test/runtests.zsh
@@ -7,7 +7,7 @@ emulate zsh
 # protect from catastrophic failure of an individual test.
 # We could probably do that with subshells instead.
 
-integer success failure skipped retval
+integer success=0 failure=0 skipped=0 retval
 for file in "${(f)ZTST_testlist}"; do
   $ZTST_exe +Z -f $ZTST_srcdir/ztst.zsh $file
   retval=$?
diff --git a/Test/ztst.zsh b/Test/ztst.zsh
index e668ae942..a59c06dcf 100755
--- a/Test/ztst.zsh
+++ b/Test/ztst.zsh
@@ -60,7 +60,7 @@ ZTST_mainopts=(${(kv)options})
 ZTST_testdir=$PWD
 ZTST_testname=$1
 
-integer ZTST_testfailed
+integer ZTST_testfailed=0
 
 # This is POSIX nonsense.  Because of the vague feeling someone, somewhere
 # may one day need to examine the arguments of "tail" using a standard


Messages sorted by: Reverse Date, Date, Thread, Author