Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: [BUG] zformat -f has no multibyte support



zsugabubus wrote on Mon, Dec 23, 2019 at 23:24:36 +0100:
>   $ setopt multibyte
>   $ zformat -f X '%-3s' 's:ő'; echo $X
>   "ő"
>   $ zformat -f X '%.1s' 's:ő'; echo $X
>   (garbage)
>   $ zformat -f X '%-3s' 's:o'; echo $X
>   "  o"

The printf builtin handles this correctly, so this should be fairly easy to fix.

Actually, I don't suppose we could just call into the printf code directly, can
we?  It _works_ (see attachment), but it's not elegant.

Aside: It is customary to use a valid from address.
diff --git a/Src/Modules/zutil.c b/Src/Modules/zutil.c
index 7d9bf05d6..bb00c8a24 100644
--- a/Src/Modules/zutil.c
+++ b/Src/Modules/zutil.c
@@ -775,7 +775,7 @@ static char *zformat_substring(char* instr, char **specs, char **outp,
 
     for (s = instr; *s && *s != endchar; s++) {
 	if (*s == '%') {
-	    int right, min = -1, max = -1, outl, testit;
+	    int right, min = -1, max = -1, testit;
 	    char *spec, *start = s;
 
 	    if ((right = (*++s == '-')))
@@ -835,11 +835,49 @@ static char *zformat_substring(char* instr, char **specs, char **outp,
 	    } else if (skip) {
 		continue;
 	    } else if ((spec = specs[STOUC(*s)])) {
-		int len;
+		int outl;
+		Param pm;
+		LinkList args = newlinklist();
+
+		/* '%', '-', min, ',', max, 's', NUL. */
+		char fmt[1 + 1 + DIGBUFSIZE-1 + 1 + DIGBUFSIZE-1 + 1 + 1];
+
+		/* zformat uses minus to mean "pad on the left".
+		 * printf uses minus to mean "pad on the right". */
+		const char *optional_minus = (right ? "" : "-");
+
+		startparamscope();
+		pm = createparam("REPLY", PM_LOCAL|PM_SCALAR);
+		if (pm)
+		    pm->level = locallevel; /* because createparam() doesn't */
+
+		addlinknode(args, "printf");
+		addlinknode(args, "-v");
+		addlinknode(args, "REPLY");
+		
+		if (min >= 0 && max >= 0) {
+		    snprintf(fmt, sizeof(fmt), "%%%s%d.%ds", optional_minus, min, max);
+		} else if (min >= 0) {
+		    snprintf(fmt, sizeof(fmt), "%%%s%ds", optional_minus, min);
+		} else if (max >= 0) {
+		    snprintf(fmt, sizeof(fmt), "%%.%ds", max);
+		} else {
+		    snprintf(fmt, sizeof(fmt), "%%%ss", optional_minus);
+		}
+		addlinknode(args, fmt);
 
-		if ((len = strlen(spec)) > max && max >= 0)
-		    len = max;
-		outl = (min >= 0 ? (min > len ? min : len) : len);
+		addlinknode(args, spec);
+
+		{
+		    Builtin builtin_printf =
+			(Builtin)builtintab->getnode(builtintab, "printf");
+		    local_list0(assigns);
+
+		    init_list0(assigns);
+		    execbuiltin(args, &assigns, builtin_printf);
+		}
+
+		outl = strlen(getsparam("REPLY"));
 
 		if (*ousedp + outl >= *olenp) {
 		    int nlen = *olenp + outl + 128;
@@ -849,24 +887,11 @@ static char *zformat_substring(char* instr, char **specs, char **outp,
 		    *olenp = nlen;
 		    *outp = tmp;
 		}
-		if (len >= outl) {
-		    memcpy(*outp + *ousedp, spec, outl);
+		{
+		    memcpy(*outp + *ousedp, getsparam("REPLY"), outl);
 		    *ousedp += outl;
-		} else {
-		    int diff = outl - len;
-
-		    if (right) {
-			while (diff--)
-			    (*outp)[(*ousedp)++] = ' ';
-			memcpy(*outp + *ousedp, spec, len);
-			*ousedp += len;
-		    } else {
-			memcpy(*outp + *ousedp, spec, len);
-			*ousedp += len;
-			while (diff--)
-			    (*outp)[(*ousedp)++] = ' ';
-		    }
 		}
+		endparamscope();
 	    } else {
 		int len = s - start + 1;
 
diff --git a/Test/D07multibyte.ztst b/Test/D07multibyte.ztst
index e20315340..3e7ec061f 100644
--- a/Test/D07multibyte.ztst
+++ b/Test/D07multibyte.ztst
@@ -585,3 +585,12 @@
 >OK
 F:A failure here may indicate the system regex library does not
 F:support character sets outside the portable 7-bit range.
+
+ if zmodload zsh/zutil 2>/dev/null; then
+   zformat -f REPLY '%.3s' 's:ヌxéfoo'
+   echo $REPLY
+ else
+   ZTST_skip="can't load the zsh/zutil module for testing"
+ fi
+0:zformat multibyte test
+>ヌxé
diff --git a/Test/V13zformat.ztst b/Test/V13zformat.ztst
index 982866e13..91901cbf4 100644
--- a/Test/V13zformat.ztst
+++ b/Test/V13zformat.ztst
@@ -65,3 +65,5 @@
 >ipsum.bar
 >bazbaz
 >\esc:ape
+
+# Multibyte tests in D07multibyte.ztst


Messages sorted by: Reverse Date, Date, Thread, Author