Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: The "set" utility outputs binary data



On Fri, 04 Dec 2015 14:29:00 +0000
Peter Stephenson <p.stephenson@xxxxxxxxxxx> wrote:
> It looks like the strategy would be be to upgrade quotedzputs() to
> interact better with nicezputs() and nicechar().  The code that's not
> there at the moment is to pick the right sort of quotes, and you only
> know that after the event at the moment, so the interface to those two
> needs expanding.
> 
> I'd propose not bothering to do this in the case where multibyte mode
> isn't available (i.e is not even compiled in).  It's not useful enough
> and wouldn't get much testing.

This seems to be going the right way; let me know of any oddities or
unwanted side effects.  Note a few "nice" representations have changed
to fit $'..' conventions.

pws

diff --git a/Src/utils.c b/Src/utils.c
index ca810de..d131383 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -411,7 +411,7 @@ putshout(int c)
 mod_export char *
 nicechar(int c)
 {
-    static char buf[6];
+    static char buf[10];
     char *s = buf;
     c &= 0xff;
     if (ZISPRINT(c))
@@ -427,7 +427,9 @@ nicechar(int c)
 	    goto done;
     }
     if (c == 0x7f) {
-	*s++ = '^';
+	*s++ = '\\';
+	*s++ = 'C';
+	*s++ = '-';
 	c = '?';
     } else if (c == '\n') {
 	*s++ = '\\';
@@ -436,7 +438,9 @@ nicechar(int c)
 	*s++ = '\\';
 	c = 't';
     } else if (c < 0x20) {
-	*s++ = '^';
+	*s++ = '\\';
+	*s++ = 'C';
+	*s++ = '-';
 	c += 0x40;
     }
     done:
@@ -455,6 +459,22 @@ nicechar(int c)
     return buf;
 }
 
+/*
+ * Return 1 if nicechar() would reformat this character.
+ */
+
+/**/
+mod_export int
+is_nicechar(int c)
+{
+    c &= 0xff;
+    if (ZISPRINT(c))
+	return 0;
+    if (c & 0x80)
+	return !isset(PRINTEIGHTBIT);
+    return (c == 0x7f || c == '\n' || c == '\t' || c < 0x20);
+}
+
 /**/
 #ifdef MULTIBYTE_SUPPORT
 static mbstate_t mb_shiftstate;
@@ -532,7 +552,9 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep)
     s = buf;
     if (!iswprint(c) && (c < 0x80 || !isset(PRINTEIGHTBIT))) {
 	if (c == 0x7f) {
-	    *s++ = '^';
+	    *s++ = '\\';
+	    *s++ = 'C';
+	    *s++ = '-';
 	    c = '?';
 	} else if (c == L'\n') {
 	    *s++ = '\\';
@@ -541,7 +563,9 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep)
 	    *s++ = '\\';
 	    c = 't';
 	} else if (c < 0x20) {
-	    *s++ = '^';
+	    *s++ = '\\';
+	    *s++ = 'C';
+	    *s++ = '-';
 	    c += 0x40;
 	} else if (c >= 0x80) {
 	    ret = -1;
@@ -611,6 +635,23 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep)
     return buf;
 }
 
+/*
+ * Return 1 if wcs_nicechar() would reformat this character for display.
+ */
+
+/**/
+mod_export int is_wcs_nicechar(wchar_t c)
+{
+    if (!iswprint(c) && (c < 0x80 || !isset(PRINTEIGHTBIT))) {
+	if (c == 0x7f || c == L'\n' || c == L'\t' || c < 0x20)
+	    return 1;
+	if (c >= 0x80) {
+	    return (c >= 0x100);
+	}
+    }
+    return 0;
+}
+
 /**/
 mod_export int
 zwcwidth(wint_t wc)
@@ -4834,12 +4875,15 @@ niceztrlen(char const *s)
  * If outstrp is not NULL, set *outstrp to a zalloc'd version of
  * the output (still metafied).
  *
- * If "heap" is non-zero, use the heap for *outstrp, else zalloc.
+ * If flags contains NICEFLAG_HEAP, use the heap for *outstrp, else
+ * zalloc.
+ * If flags contsins NICEFLAG_QUOTE, the output is going to be within
+ * $'...', so quote "'" with a backslash.
  */
 
 /**/
 mod_export size_t
-mb_niceformat(const char *s, FILE *stream, char **outstrp, int heap)
+mb_niceformat(const char *s, FILE *stream, char **outstrp, int flags)
 {
     size_t l = 0, newl;
     int umlen, outalloc, outleft, eol = 0;
@@ -4886,7 +4930,10 @@ mb_niceformat(const char *s, FILE *stream, char **outstrp, int heap)
 	    cnt = 1;
 	    /* FALL THROUGH */
 	default:
-	    fmt = wcs_nicechar(c, &newl, NULL);
+	    if (c == L'\'' && (flags & NICEFLAG_QUOTE))
+		fmt = "\\'";
+	    else
+		fmt = wcs_nicechar(c, &newl, NULL);
 	    break;
 	}
 
@@ -4920,13 +4967,71 @@ mb_niceformat(const char *s, FILE *stream, char **outstrp, int heap)
     if (outstrp) {
 	*outptr = '\0';
 	/* Use more efficient storage for returned string */
-	*outstrp = heap ? dupstring(outstr) : ztrdup(outstr);
+	*outstrp = (flags & NICEFLAG_HEAP) ? dupstring(outstr) : ztrdup(outstr);
 	free(outstr);
     }
 
     return l;
 }
 
+/*
+ * Return 1 if mb_niceformat() would reformat this string, else 0.
+ */
+
+/**/
+mod_export int
+is_mb_niceformat(const char *s)
+{
+    int umlen, eol = 0, ret = 0;
+    wchar_t c;
+    char *ums, *ptr;
+    mbstate_t mbs;
+
+    ums = ztrdup(s);
+    untokenize(ums);
+    ptr = unmetafy(ums, &umlen);
+
+    memset(&mbs, 0, sizeof mbs);
+    while (umlen > 0) {
+	size_t cnt = eol ? MB_INVALID : mbrtowc(&c, ptr, umlen, &mbs);
+
+	switch (cnt) {
+	case MB_INCOMPLETE:
+	    eol = 1;
+	    /* FALL THROUGH */
+	case MB_INVALID:
+	    /* The byte didn't convert, so output it as a \M-... sequence. */
+	    if (is_nicechar(*ptr))  {
+		ret = 1;
+		break;
+	    }
+	    cnt = 1;
+	    /* Get mbs out of its undefined state. */
+	    memset(&mbs, 0, sizeof mbs);
+	    break;
+	case 0:
+	    /* Careful:  converting '\0' returns 0, but a '\0' is a
+	     * real character for us, so we should consume 1 byte. */
+	    cnt = 1;
+	    /* FALL THROUGH */
+	default:
+	    if (is_wcs_nicechar(c))
+		ret = 1;
+	    break;
+	}
+
+	if (ret)
+	    break;
+
+	umlen -= cnt;
+	ptr += cnt;
+    }
+
+    free(ums);
+
+    return ret;
+}
+
 /* ztrdup multibyte string with nice formatting */
 
 /**/
@@ -4935,7 +5040,7 @@ nicedup(const char *s, int heap)
 {
     char *retstr;
 
-    (void)mb_niceformat(s, NULL, &retstr, heap);
+    (void)mb_niceformat(s, NULL, &retstr, heap ? NICEFLAG_HEAP : 0);
 
     return retstr;
 }
@@ -5717,22 +5822,35 @@ quotestring(const char *s, char **e, int instring)
 /* Unmetafy and output a string, quoted if it contains special characters. */
 
 /**/
-mod_export int
+mod_export void
 quotedzputs(char const *s, FILE *stream)
 {
     int inquote = 0, c;
 
     /* check for empty string */
-    if(!*s)
-	return fputs("''", stream);
+    if(!*s) {
+	fputs("''", stream);
+	return;
+    }
 
-    if (!hasspecial(s))
-	return zputs(s, stream);
+#ifdef MULTIBYTE_SUPPORT
+    if (is_mb_niceformat(s)) {
+	fputs("$'", stream);
+	mb_niceformat(s, stream, NULL, NICEFLAG_QUOTE);
+	fputc('\'', stream);
+	return;
+    }
+#endif /* MULTIBYTE_SUPPORT */
+
+    if (!hasspecial(s)) {
+	zputs(s, stream);
+	return;
+    }
 
     if (isset(RCQUOTES)) {
 	/* use rc-style quotes-within-quotes for the whole string */
 	if(fputc('\'', stream) < 0)
-	    return EOF;
+	    return;
 	while(*s) {
 	    if (*s == Meta)
 		c = *++s ^ 32;
@@ -5741,16 +5859,16 @@ quotedzputs(char const *s, FILE *stream)
 	    s++;
 	    if (c == '\'') {
 		if(fputc('\'', stream) < 0)
-		    return EOF;
+		    return;
 	    } else if(c == '\n' && isset(CSHJUNKIEQUOTES)) {
 		if(fputc('\\', stream) < 0)
-		    return EOF;
+		    return;
 	    }
 	    if(fputc(c, stream) < 0)
-		return EOF;
+		return;
 	}
 	if(fputc('\'', stream) < 0)
-	    return EOF;
+	    return;
     } else {
 	/* use Bourne-style quoting, avoiding empty quoted strings */
 	while(*s) {
@@ -5762,31 +5880,30 @@ quotedzputs(char const *s, FILE *stream)
 	    if (c == '\'') {
 		if(inquote) {
 		    if(fputc('\'', stream) < 0)
-			return EOF;
+			return;
 		    inquote=0;
 		}
 		if(fputs("\\'", stream) < 0)
-		    return EOF;
+		    return;
 	    } else {
 		if (!inquote) {
 		    if(fputc('\'', stream) < 0)
-			return EOF;
+			return;
 		    inquote=1;
 		}
 		if(c == '\n' && isset(CSHJUNKIEQUOTES)) {
 		    if(fputc('\\', stream) < 0)
-			return EOF;
+			return;
 		}
 		if(fputc(c, stream) < 0)
-		    return EOF;
+		    return;
 	    }
 	}
 	if (inquote) {
 	    if(fputc('\'', stream) < 0)
-		return EOF;
+		return;
 	}
     }
-    return 0;
 }
 
 /* Double-quote a metafied string. */
diff --git a/Src/zsh.h b/Src/zsh.h
index d3bfcef..caf7def 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -3051,6 +3051,12 @@ enum {
 #define AFTERTRAPHOOK  (zshhooks + 2)
 
 #ifdef MULTIBYTE_SUPPORT
+/* Final argument to mb_niceformat() */
+enum {
+    NICEFLAG_HEAP = 1,		/* Heap allocation where needed */
+    NICEFLAG_QUOTE = 2,		/* Result will appear in $'...' */
+};
+
 /* Metafied input */
 #define nicezputs(str, outs)	(void)mb_niceformat((str), (outs), NULL, 0)
 #define MB_METACHARINIT()	mb_charinit()



Messages sorted by: Reverse Date, Date, Thread, Author