Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: The "set" utility outputs binary data



On Mon, 7 Dec 2015 10:13:08 -0800
Bart Schaefer <schaefer@xxxxxxxxxxxxxxxx> wrote:
> On Dec 7, 10:24am, Peter Stephenson wrote:
> }
> } (You can't seriously be complaining that "typeset -m IFS" now outputs
> } 
> } IFS=$' \t\n\C-@'
> } 
> } instead of raw binary, can you?)
> 
> Goodness, no.  Just the ${(V)...} substitution, mostly (the test cases
> 37335 updates) and anyplace where it's in human-informational output
> rather than machine-re-readable output.

This attempts to restore the short form when not called from
quotedzputs().  As a "free" bonus (that is, it's free to everyone else),
you can use ${(q+)...} to get the same effect as the new quoting within
parameters (so (V) does what it usd to but (q+) gives you something
a bit similar but readbackinable).

I'll write some tests one day.

I suppose you'll be wanting it to work, next.

diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo
index 564c70d..c6e7b6f 100644
--- a/Doc/Zsh/expn.yo
+++ b/Doc/Zsh/expn.yo
@@ -1067,6 +1067,11 @@ If a tt(q-) is given (only a single tt(q) may appear), a minimal
 form of single quoting is used that only quotes the string if needed to
 protect special characters.  Typically this form gives the most readable
 output.
+
+If a tt(q+) is given, an extended form of minmal quoting is used that
+causes unprintable characters to be rendered using tt($')var(...)tt(').
+This quoting is similar to that used by the output of values by the
+tt(typeset) family of commands.
 )
 item(tt(Q))(
 Remove one level of quotes from the resulting words.
diff --git a/Src/subst.c b/Src/subst.c
index d9c9d24..bb1dd89 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -1887,12 +1887,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
 		    if (quotetype == QT_DOLLARS ||
 			quotetype == QT_BACKSLASH_PATTERN)
 			goto flagerr;
-		    if (s[1] == '-') {
+		    if (s[1] == '-' || s[1] == '+') {
 			if (quotemod)
 			    goto flagerr;
 			s++;
 			quotemod = 1;
-			quotetype = QT_SINGLE_OPTIONAL;
+			quotetype = (*s == '-') ? QT_SINGLE_OPTIONAL :
+			    QT_QUOTEDZPUTS;
 		    } else {
 			if (quotetype == QT_SINGLE_OPTIONAL) {
 			    /* extra q's after '-' not allowed */
@@ -3583,7 +3584,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
 	    ap = aval;
 
 	    if (quotemod > 0) {
-		if (quotetype > QT_BACKSLASH) {
+		if (quotetype == QT_QUOTEDZPUTS) {
+		    for (; *ap; ap++)
+			*ap = quotedzputs(*ap, NULL);
+		} else if (quotetype > QT_BACKSLASH) {
 		    int sl;
 		    char *tmp;
 
@@ -3626,7 +3630,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
 	    if (!copied)
 		val = dupstring(val), copied = 1;
 	    if (quotemod > 0) {
-		if (quotetype > QT_BACKSLASH) {
+		if (quotetype == QT_QUOTEDZPUTS) {
+		    val = quotedzputs(val, NULL);
+		} else if (quotetype > QT_BACKSLASH) {
 		    int sl;
 		    char *tmp;
 		    tmp = quotestring(val, NULL, quotetype);
diff --git a/Src/utils.c b/Src/utils.c
index fc2b192..1554fa0 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -387,6 +387,7 @@ putshout(int c)
     return 0;
 }
 
+#ifdef MULTIBYTE_SUPPORT
 /*
  * Turn a character into a visible representation thereof.  The visible
  * string is put together in a static buffer, and this function returns
@@ -409,6 +410,73 @@ putshout(int c)
 
 /**/
 mod_export char *
+nicechar_sel(int c, int quotable)
+{
+    static char buf[10];
+    char *s = buf;
+    c &= 0xff;
+    if (ZISPRINT(c))
+	goto done;
+    if (c & 0x80) {
+	if (isset(PRINTEIGHTBIT))
+	    goto done;
+	*s++ = '\\';
+	*s++ = 'M';
+	*s++ = '-';
+	c &= 0x7f;
+	if(ZISPRINT(c))
+	    goto done;
+    }
+    if (c == 0x7f) {
+	if (quotable) {
+	    *s++ = '\\';
+	    *s++ = 'C';
+	    *s++ = '-';
+	} else
+	    *s++ = '^';
+	c = '?';
+    } else if (c == '\n') {
+	*s++ = '\\';
+	c = 'n';
+    } else if (c == '\t') {
+	*s++ = '\\';
+	c = 't';
+    } else if (c < 0x20) {
+	if (quotable) {
+	    *s++ = '\\';
+	    *s++ = 'C';
+	    *s++ = '-';
+	} else
+	    *s++ = '^';
+	c += 0x40;
+    }
+    done:
+    /*
+     * The resulting string is still metafied, so check if
+     * we are returning a character in the range that needs metafication.
+     * This can't happen if the character is printed "nicely", so
+     * this results in a maximum of two bytes total (plus the null).
+     */
+    if (imeta(c)) {
+	*s++ = Meta;
+	*s++ = c ^ 32;
+    } else
+	*s++ = c;
+    *s = 0;
+    return buf;
+}
+
+/**/
+mod_export char *
+nicechar(int c)
+{
+    return nicechar_sel(c, 0);
+}
+
+#else /* MULTIBYTE_SUPPORT */
+
+/**/
+mod_export char *
 nicechar(int c)
 {
     static char buf[10];
@@ -459,6 +527,8 @@ nicechar(int c)
     return buf;
 }
 
+#endif /* MULTIBYTE_SUPPORT */
+
 /*
  * Return 1 if nicechar() would reformat this character.
  */
@@ -527,7 +597,7 @@ mb_charinit(void)
 
 /**/
 mod_export char *
-wcs_nicechar(wchar_t c, size_t *widthp, char **swidep)
+wcs_nicechar_sel(wchar_t c, size_t *widthp, char **swidep, int quotable)
 {
     static char *buf;
     static int bufalloc = 0, newalloc;
@@ -552,9 +622,12 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep)
     s = buf;
     if (!iswprint(c) && (c < 0x80 || !isset(PRINTEIGHTBIT))) {
 	if (c == 0x7f) {
-	    *s++ = '\\';
-	    *s++ = 'C';
-	    *s++ = '-';
+	    if (quotable) {
+		*s++ = '\\';
+		*s++ = 'C';
+		*s++ = '-';
+	    } else
+		*s++ = '^';
 	    c = '?';
 	} else if (c == L'\n') {
 	    *s++ = '\\';
@@ -563,9 +636,12 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep)
 	    *s++ = '\\';
 	    c = 't';
 	} else if (c < 0x20) {
-	    *s++ = '\\';
-	    *s++ = 'C';
-	    *s++ = '-';
+	    if (quotable) {
+		*s++ = '\\';
+		*s++ = 'C';
+		*s++ = '-';
+	    } else
+		*s++ = '^';
 	    c += 0x40;
 	} else if (c >= 0x80) {
 	    ret = -1;
@@ -635,6 +711,13 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep)
     return buf;
 }
 
+/**/
+mod_export char *
+wcs_nicechar(wchar_t c, size_t *widthp, char **swidep)
+{
+    return wcs_nicechar_sel(c, widthp, swidep, 0);
+}
+
 /*
  * Return 1 if wcs_nicechar() would reformat this character for display.
  */
@@ -4918,7 +5001,7 @@ mb_niceformat(const char *s, FILE *stream, char **outstrp, int flags)
 	    /* FALL THROUGH */
 	case MB_INVALID:
 	    /* The byte didn't convert, so output it as a \M-... sequence. */
-	    fmt = nicechar(*ptr);
+	    fmt = nicechar_sel(*ptr, flags & NICEFLAG_QUOTE);
 	    newl = strlen(fmt);
 	    cnt = 1;
 	    /* Get mbs out of its undefined state. */
@@ -4933,7 +5016,7 @@ mb_niceformat(const char *s, FILE *stream, char **outstrp, int flags)
 	    if (c == L'\'' && (flags & NICEFLAG_QUOTE))
 		fmt = "\\'";
 	    else
-		fmt = wcs_nicechar(c, &newl, NULL);
+		fmt = wcs_nicechar_sel(c, &newl, NULL, flags & NICEFLAG_QUOTE);
 	    break;
 	}
 
@@ -4967,8 +5050,13 @@ mb_niceformat(const char *s, FILE *stream, char **outstrp, int flags)
     if (outstrp) {
 	*outptr = '\0';
 	/* Use more efficient storage for returned string */
-	*outstrp = (flags & NICEFLAG_HEAP) ? dupstring(outstr) : ztrdup(outstr);
-	free(outstr);
+	if (flags & NICEFLAG_NODUP)
+	    *outstrp = outstr;
+	else {
+	    *outstrp = (flags & NICEFLAG_HEAP) ? dupstring(outstr) :
+		ztrdup(outstr);
+	    free(outstr);
+	}
     }
 
     return l;
@@ -5834,38 +5922,76 @@ quotestring(const char *s, char **e, int instring)
     return v;
 }
 
-/* Unmetafy and output a string, quoted if it contains special characters. */
+/*
+ * Unmetafy and output a string, quoted if it contains special
+ * characters.
+ *
+ * If stream is NULL, return the same output with any allocation on the
+ * heap.
+ */
 
 /**/
-mod_export void
+mod_export char *
 quotedzputs(char const *s, FILE *stream)
 {
     int inquote = 0, c;
+    char *outstr, *ptr;
 
     /* check for empty string */
     if(!*s) {
+	if (!stream)
+	    return "''";
 	fputs("''", stream);
-	return;
+	return NULL;
     }
 
 #ifdef MULTIBYTE_SUPPORT
     if (is_mb_niceformat(s)) {
-	fputs("$'", stream);
-	mb_niceformat(s, stream, NULL, NICEFLAG_QUOTE);
-	fputc('\'', stream);
-	return;
+	if (stream) {
+	    fputs("$'", stream);
+	    mb_niceformat(s, stream, NULL, NICEFLAG_QUOTE);
+	    fputc('\'', stream);
+	    return NULL;
+	} else {
+	    char *substr;
+	    mb_niceformat(s, NULL, &substr, NICEFLAG_QUOTE|NICEFLAG_NODUP);
+	    outstr = (char *)zhalloc(4 + strlen(substr));
+	    sprintf(outstr, "$'%s'", substr);
+	    free(substr);
+	    return outstr;
+	}
     }
 #endif /* MULTIBYTE_SUPPORT */
 
     if (!hasspecial(s)) {
-	zputs(s, stream);
-	return;
+	if (stream) {
+	    zputs(s, stream);
+	    return NULL;
+	} else {
+	    return dupstring(s);
+	}
     }
 
+    if (!stream) {
+	const char *cptr;
+	int l = strlen(s) + 2;
+	for (cptr = s; *cptr; cptr++) {
+	    if (*cptr == Meta)
+		cptr++;
+	    else if (*cptr == '\'')
+		l += isset(RCQUOTES) ? 1 : 3;
+	}
+	ptr = outstr = zhalloc(l + 1);
+    } else {
+	ptr = outstr = NULL;
+    }
     if (isset(RCQUOTES)) {
 	/* use rc-style quotes-within-quotes for the whole string */
-	if(fputc('\'', stream) < 0)
-	    return;
+	if (stream) {
+	    if (fputc('\'', stream) < 0)
+		return NULL;
+	} else
+	    *ptr++ = '\'';
 	while(*s) {
 	    if (*s == Meta)
 		c = *++s ^ 32;
@@ -5873,52 +5999,98 @@ quotedzputs(char const *s, FILE *stream)
 		c = *s;
 	    s++;
 	    if (c == '\'') {
-		if(fputc('\'', stream) < 0)
-		    return;
-	    } else if(c == '\n' && isset(CSHJUNKIEQUOTES)) {
-		if(fputc('\\', stream) < 0)
-		    return;
+		if (stream) {
+		    if (fputc('\'', stream) < 0)
+			return NULL;
+		} else
+		    *ptr++ = '\'';
+	    } else if (c == '\n' && isset(CSHJUNKIEQUOTES)) {
+		if (stream) {
+		    if (fputc('\\', stream) < 0)
+			return NULL;
+		} else
+		    *ptr++ = '\\';
+	    }
+	    if (stream) {
+		if (fputc(c, stream) < 0)
+		    return NULL;
+	    } else {
+		if (imeta(c)) {
+		    *ptr++ = Meta;
+		    *ptr++ = c ^ 32;
+		} else
+		    *ptr++ = c;
 	    }
-	    if(fputc(c, stream) < 0)
-		return;
 	}
-	if(fputc('\'', stream) < 0)
-	    return;
+	if (stream) {
+	    if (fputc('\'', stream) < 0)
+		return NULL;
+	} else
+	    *ptr++ = '\'';
     } else {
 	/* use Bourne-style quoting, avoiding empty quoted strings */
-	while(*s) {
+	while (*s) {
 	    if (*s == Meta)
 		c = *++s ^ 32;
 	    else
 		c = *s;
 	    s++;
 	    if (c == '\'') {
-		if(inquote) {
-		    if(fputc('\'', stream) < 0)
-			return;
+		if (inquote) {
+		    if (stream) {
+			if (putc('\'', stream) < 0)
+			    return NULL;
+		    } else
+			*ptr++ = '\'';
 		    inquote=0;
 		}
-		if(fputs("\\'", stream) < 0)
-		    return;
+		if (stream) {
+		    if (fputs("\\'", stream) < 0)
+			return NULL;
+		} else {
+		    *ptr++ = '\\';
+		    *ptr++ = '\'';
+		}
 	    } else {
 		if (!inquote) {
-		    if(fputc('\'', stream) < 0)
-			return;
+		    if (stream) {
+			if (fputc('\'', stream) < 0)
+			    return NULL;
+		    } else
+			*ptr++ = '\'';
 		    inquote=1;
 		}
-		if(c == '\n' && isset(CSHJUNKIEQUOTES)) {
-		    if(fputc('\\', stream) < 0)
-			return;
+		if (c == '\n' && isset(CSHJUNKIEQUOTES)) {
+		    if (stream) {
+			if (fputc('\\', stream) < 0)
+			    return NULL;
+		    } else
+			*ptr++ = '\\';
+		}
+		if (stream) {
+		    if (fputc(c, stream) < 0)
+			return NULL;
+		} else {
+		    if (imeta(c)) {
+			*ptr++ = Meta;
+			*ptr++ = c ^ 32;
+		    } else
+			*ptr++ = c;
 		}
-		if(fputc(c, stream) < 0)
-		    return;
 	    }
 	}
 	if (inquote) {
-	    if(fputc('\'', stream) < 0)
-		return;
+	    if (stream) {
+		if (fputc('\'', stream) < 0)
+		    return NULL;
+	    } else
+		*ptr++ = '\'';
 	}
     }
+    if (!stream)
+	*ptr++ = '\0';
+
+    return outstr;
 }
 
 /* Double-quote a metafied string. */
diff --git a/Src/zsh.h b/Src/zsh.h
index caf7def..0302d68 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -272,7 +272,12 @@ enum {
     /*
      * As QT_BACKSLASH, but a NULL string is shown as ''.
      */
-    QT_BACKSLASH_SHOWNULL
+    QT_BACKSLASH_SHOWNULL,
+    /*
+     * Quoting as produced by quotedzputs(), used for human
+     * readability of parameter values.
+     */
+    QT_QUOTEDZPUTS
 };
 
 #define QT_IS_SINGLE(x)	((x) == QT_SINGLE || (x) == QT_SINGLE_OPTIONAL)
@@ -3055,6 +3060,7 @@ enum {
 enum {
     NICEFLAG_HEAP = 1,		/* Heap allocation where needed */
     NICEFLAG_QUOTE = 2,		/* Result will appear in $'...' */
+    NICEFLAG_NODUP = 4,         /* Leave allocated */
 };
 
 /* Metafied input */
diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst
index 2b46e06..1460ff6 100644
--- a/Test/D04parameter.ztst
+++ b/Test/D04parameter.ztst
@@ -398,7 +398,7 @@
   foo=$'\x7f\x00'
   print -r -- ${(V)foo}
 0:${(V)...}
->\C-?\C-@
+>^?^@
 
   foo='playing '\''stupid'\'' "games" \w\i\t\h $quoting.'
   print -r ${(q)foo}
diff --git a/Test/V09datetime.ztst b/Test/V09datetime.ztst
index 831421d..7905155 100644
--- a/Test/V09datetime.ztst
+++ b/Test/V09datetime.ztst
@@ -71,4 +71,4 @@
 
   print -r -- ${(V)"$(strftime $'%Y\0%m\0%d' 100000000)"}
 0:Embedded nulls
->1973\C-@03\C-@03
+>1973^@03^@03



Messages sorted by: Reverse Date, Date, Thread, Author