Zsh Mailing List Archive
Messages sorted by:
Reverse Date,
Date,
Thread,
Author
Re: shwordsplit: final non-whitespace IFS character problem
- X-seq: zsh-workers 41499
- From: Peter Stephenson <p.w.stephenson@xxxxxxxxxxxx>
- To: Zsh hackers list <zsh-workers@xxxxxxx>
- Subject: Re: shwordsplit: final non-whitespace IFS character problem
- Date: Sun, 6 Aug 2017 21:01:10 +0100
- Dkim-signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=ntlworld.com; s=meg.feb2017; t=1502049670; bh=tkwi2zyJE0keT8Sl2IrVZZMFgGLX92DxGdqVJteQg8g=; h=Date:From:To:Subject:In-Reply-To:References; b=Nsjd8YFjKZ0FbOFayAXJX35rBbQN13/Wzqyiktdckty019c7fyUaAvD0BYQ8288tu WYL4YSj4apu4ibSqUzQ3OlzTGES10bJdakjPkAuzcmJIYYkS+4uLJKdWak+joNWMgz WdA1ciZ6YQIASs9IoplyI2RzEj3H7JmMvf/ZGR2s0Bh3bhIQfojP49TfuhwnVEd2A6 zqgtSdbiiNNBcG9VgAiushd+3U1EEP4J11W5ILV5Qjc1uQ9dc1NR+szR7uKSTd/deY xR1PMS1b4/NcKTwKBU62a4I5G+yxtc8W3PwZGfjMrmZCMDbNFm68cpKT46mpFWlN5R DlE9tjXwAFIdg==
- In-reply-to: <20170806190834.5073e18a@ntlworld.com>
- List-help: <mailto:zsh-workers-help@zsh.org>
- List-id: Zsh Workers List <zsh-workers.zsh.org>
- List-post: <mailto:zsh-workers@zsh.org>
- Mailing-list: contact zsh-workers-help@xxxxxxx; run by ezmlm
- References: <0f71b764-cc3d-5274-a16a-498b792bff6e@inlv.org> <20170806190834.5073e18a@ntlworld.com>
This uses some rephrasings from Daniel and also fails to leak in the
case of non-heap allocation.
pws
diff --git a/Doc/Zsh/options.yo b/Doc/Zsh/options.yo
index 70092d6..36bd939 100644
--- a/Doc/Zsh/options.yo
+++ b/Doc/Zsh/options.yo
@@ -2193,16 +2193,16 @@ cindex(discarding embedded nulls in $'...')
cindex(embedded nulls, in $'...')
cindex(nulls, embedded in $'...')
item(tt(POSIX_STRINGS) <K> <S>)(
-This option affects processing of quoted strings. Currently it only
-affects the behaviour of null characters, i.e. character 0 in the
-portable character set corresponding to US ASCII.
+This option affects processing of quoted strings, and also
+splitting of strngs.
-When this option is not set, null characters embedded within strings
-of the form tt($')var(...)tt(') are treated as ordinary characters. The
-entire string is maintained within the shell and output to files where
-necessary, although owing to restrictions of the library interface
-the string is truncated at the null character in file names, environment
-variables, or in arguments to external programs.
+When this option is not set, null characters (character 0 in the
+portable character set coresponding to US ASCII) that are embedded
+within strings of the form tt($')var(...)tt(') are treated as ordinary
+characters. The entire string is maintained within the shell and output
+to files where necessary, although owing to restrictions of the library
+interface the string is truncated at the null character in file names,
+environment variables, or in arguments to external programs.
When this option is set, the tt($')var(...)tt(') expression is truncated at
the null character. Note that remaining parts of the same string
@@ -2211,6 +2211,18 @@ beyond the termination of the quotes are not truncated.
For example, the command line argument tt(a$'b\0c'd) is treated with
the option off as the characters tt(a), tt(b), null, tt(c), tt(d),
and with the option on as the characters tt(a), tt(b), tt(d).
+
+Furthermore, when the option is set, a trailing separator followed by an
+empty strings does not cause extra fields to be produced when the string
+is split. For example,
+
+example(var="foo bar "
+print -l "${=var}")
+
+outputs a blank line at the end if tt(POSIXSTRINGS) is not set, but
+no blank line if the option is set. Note that empty elements would in
+any case be removed if quotation marks were not used. If the separator
+is not white space, only the final separator is ignored in this fashion.
)
pindex(POSIX_TRAPS)
pindex(NO_POSIX_TRAPS)
diff --git a/Src/utils.c b/Src/utils.c
index 5055d69..7d8e98c 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -3500,12 +3500,12 @@ skipwsep(char **s)
mod_export char **
spacesplit(char *s, int allownull, int heap, int quote)
{
- char *t, **ret, **ptr;
+ char *t, **ret, **ptr, **eptr;
int l = sizeof(*ret) * (wordcount(s, NULL, -!allownull) + 1);
char *(*dup)(const char *) = (heap ? dupstring : ztrdup);
/* ### TODO: s/calloc/alloc/ */
- ptr = ret = (char **) (heap ? hcalloc(l) : zshcalloc(l));
+ eptr = ptr = ret = (char **) (heap ? hcalloc(l) : zshcalloc(l));
if (quote) {
/*
@@ -3537,6 +3537,7 @@ spacesplit(char *s, int allownull, int heap, int quote)
if (s > t || allownull) {
*ptr = (char *) (heap ? zhalloc((s - t) + 1) :
zalloc((s - t) + 1));
+ eptr = ptr;
ztrncpy(*ptr++, t, s - t);
} else
*ptr++ = dup(nulstring);
@@ -3545,6 +3546,21 @@ spacesplit(char *s, int allownull, int heap, int quote)
}
if (!allownull && t != s)
*ptr++ = dup("");
+ if (isset(POSIXSTRINGS) && ptr != eptr + 1) {
+ /*
+ * Trailing separators do not generate extra fields in POSIX.
+ * Note this is only the final separator --- if the
+ * immediately preceding field was null it is still counted.
+ * So just back up one.
+ */
+ --ptr;
+ if (!heap) {
+ char **ret2 = realloc(ret, sizeof(*ret) * (ptr+1-ret));
+ ptr -= ret-ret2;
+ free(ret);
+ ret = ret2;
+ }
+ }
*ptr = NULL;
return ret;
}
diff --git a/Test/E01options.ztst b/Test/E01options.ztst
index f01d835..b394e7c 100644
--- a/Test/E01options.ztst
+++ b/Test/E01options.ztst
@@ -1339,3 +1339,44 @@
?(anon):4: `break' active at end of function scope
?(anon):4: `break' active at end of function scope
?(anon):4: `break' active at end of function scope
+
+ for opt in POSIX_STRINGS NO_POSIX_STRINGS; do
+ var="foo bar "
+ (setopt $opt; print -l X "${=var}" Y)
+ var="foo2::bar2:"
+ (setopt $opt; IFS=:; print -l X "${=var}" Y)
+ var="foo3:bar3::"
+ (setopt $opt; IFS=:; print -l X "${=var}" Y)
+ done
+0:POSIX_STRINGS effect on final delimiters
+>X
+>foo
+>bar
+>Y
+>X
+>foo2
+>
+>bar2
+>Y
+>X
+>foo3
+>bar3
+>
+>Y
+>X
+>foo
+>bar
+>
+>Y
+>X
+>foo2
+>
+>bar2
+>
+>Y
+>X
+>foo3
+>bar3
+>
+>
+>Y
Messages sorted by:
Reverse Date,
Date,
Thread,
Author