Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: Core dump with latest CVS



Peter Stephenson wrote:
> \b used to be treated as length 1; now we take its width.  For control
> characters currently we assume width 1.  This causes the padding width
                                       ^ that should have been 0.
> to be zero, which causes the shell to crash: I can fix the crash easily.
> 
> - Mostly the problem is just control characters.  I noted one before.
> We could assume these have length 1 rather than 0 (and document this,
> obviously).  It's not particularly logical but it fixes up most of the
> problem cases without any need for tricks involving multibyte mode or
> new features.  Most users probably won't be (directly) affected anyway.

I'll commit this pair of changes for now, since it fixes the crash and
mostly does the right thing transparently---ASCII characters behave as
they used to and multibyte characters as they should---but if there are
strong views I can change it.

I've tried to be consistent with the assumed width of control
characters; in most other circumstances there's no simple answer that
will do the right thing, so it might as well be 1 as zero.  We already
tend to use nicechar() anywhere the problem is significant.

Index: README
===================================================================
RCS file: /cvsroot/zsh/zsh/README,v
retrieving revision 1.37
diff -u -r1.37 README
--- README	10 Sep 2006 15:24:26 -0000	1.37
+++ README	15 Sep 2006 13:00:20 -0000
@@ -49,6 +49,13 @@
 other shell at startup; it must be present in the environment or set
 subsequently by the user.  It is valid for the variable to be unset.
 
+The MULTIBYTE option is on by default where it is available; this
+causes many operations to recognise characters as in the current locale.
+Older versions of the shell always assumed a character was one byte.
+In some places the width of the character will be used; this is transparent
+when used for calculations of screen position, but also occurs, for
+example, in calculations of padding width.
+
 Zsh has previously been lax about whether it allows octets with the
 top bit set to be part of a shell identifier.  Older versions of the shell
 assumed all such octets were allowed in identifiers, however the POSIX
Index: Doc/Zsh/expn.yo
===================================================================
RCS file: /cvsroot/zsh/zsh/Doc/Zsh/expn.yo,v
retrieving revision 1.69
diff -u -r1.69 expn.yo
--- Doc/Zsh/expn.yo	14 Sep 2006 09:03:16 -0000	1.69
+++ Doc/Zsh/expn.yo	15 Sep 2006 13:00:22 -0000
@@ -871,7 +871,9 @@
 
 If the tt(MULTIBYTE) option is in effect, screen character widths will
 be used for the calculation of padding; otherwise individual bytes are
-treat as occupying one unit of width.
+treat as occupying one unit of width.  Control characters are always
+assumed to be one unit wide; this allows the mechanism to be used
+for generating repetitions of control characters.
 )
 item(tt(r:)var(expr)tt(::)var(string1)tt(::)var(string2)tt(:))(
 As tt(l), but pad the words on the right and insert var(string2)
Index: Src/prompt.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/prompt.c,v
retrieving revision 1.40
diff -u -r1.40 prompt.c
--- Src/prompt.c	13 Sep 2006 20:55:30 -0000	1.40
+++ Src/prompt.c	15 Sep 2006 13:00:25 -0000
@@ -944,10 +944,15 @@
 		multi = 0;
 		break;
 	    default:
-		/* If the character isn't printable, wcwidth() returns -1. */
+		/*
+		 * If the character isn't printable, wcwidth() returns
+		 * -1.  We assume width 1.
+		 */
 		wcw = wcwidth(wc);
-		if (wcw > 0)
+		if (wcw >= 0)
 		    w += wcw;
+		else
+		    w++;
 		multi = 0;
 		break;
 	    }
@@ -1152,8 +1157,10 @@
 				break;
 			    default:
 				wcw = wcwidth(cc);
-				if (wcw > 0)
+				if (wcw >= 0)
 				    remw -= wcw;
+				else
+				    remw--;
 				break;
 			    }
 #else
@@ -1215,8 +1222,10 @@
 				break;
 			    default:
 				wcw = wcwidth(cc);
-				if (wcw > 0)
+				if (wcw >= 0)
 				    maxwidth -= wcw;
+				else
+				    maxwidth--;
 				break;
 			    }
 #else
Index: Src/subst.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/subst.c,v
retrieving revision 1.59
diff -u -r1.59 subst.c
--- Src/subst.c	13 Sep 2006 20:55:30 -0000	1.59
+++ Src/subst.c	15 Sep 2006 13:00:27 -0000
@@ -837,29 +837,31 @@
 		    }
 		} else {
 		    f -= lpreone;
-		    if ((m = f % lpremul)) {
-			/*
-			 * Left over fraction of repeated string.
-			 */
-			MB_METACHARINIT();
-			/* Skip this much. */
-			m = lpremul - m;
-			for (t = premul; m > 0; ) {
-			    t += MB_METACHARLENCONV(t, &cchar);
-			    m -= WCWIDTH(cchar);
-			}
-			/* Output the rest. */
-			while (*t)
-			    *r++ = *t++;
-		    }
-		    for (cc = f / lpremul; cc--;) {
-			/* Repeat the repeated string */
-			MB_METACHARINIT();
-			for (c = lpremul, t = premul; c > 0; ) {
-			    cl = MB_METACHARLENCONV(t, &cchar);
-			    while (cl--)
+		    if (lpremul) {
+			if ((m = f % lpremul)) {
+			    /*
+			     * Left over fraction of repeated string.
+			     */
+			    MB_METACHARINIT();
+			    /* Skip this much. */
+			    m = lpremul - m;
+			    for (t = premul; m > 0; ) {
+				t += MB_METACHARLENCONV(t, &cchar);
+				m -= WCWIDTH(cchar);
+			    }
+			    /* Output the rest. */
+			    while (*t)
 				*r++ = *t++;
-			    c -= WCWIDTH(cchar);
+			}
+			for (cc = f / lpremul; cc--;) {
+			    /* Repeat the repeated string */
+			    MB_METACHARINIT();
+			    for (c = lpremul, t = premul; c > 0; ) {
+				cl = MB_METACHARLENCONV(t, &cchar);
+				while (cl--)
+				    *r++ = *t++;
+				c -= WCWIDTH(cchar);
+			    }
 			}
 		    }
 		    if (preone) {
@@ -910,19 +912,21 @@
 			while (*postone)
 			    *r++ = *postone++;
 		    }
-		    for (cc = f / lpostmul; cc--;) {
-			/* Begin the beguine */
-			for (t = postmul; *t; )
-			    *r++ = *t++;
-		    }
-		    if ((m = f % lpostmul)) {
-			/* Fill leftovers with chunk of repeated string */
-			MB_METACHARINIT();
-			while (m > 0) {
-			    cl = MB_METACHARLENCONV(postmul, &cchar);
-			    m -= WCWIDTH(cchar);
-			    while (cl--)
-				*r++ = *postmul++;
+		    if (lpostmul) {
+			for (cc = f / lpostmul; cc--;) {
+			    /* Begin the beguine */
+			    for (t = postmul; *t; )
+				*r++ = *t++;
+			}
+			if ((m = f % lpostmul)) {
+			    /* Fill leftovers with chunk of repeated string */
+			    MB_METACHARINIT();
+			    while (m > 0) {
+				cl = MB_METACHARLENCONV(postmul, &cchar);
+				m -= WCWIDTH(cchar);
+				while (cl--)
+				    *r++ = *postmul++;
+			    }
 			}
 		    }
 		}
@@ -983,37 +987,39 @@
 		     * first
 		     */
 		    f -= lpreone;
-		    if ((m = f % lpremul)) {
-			/*
-			 * Some fraction of the repeated string needed.
-			 */
-			/* Need this much... */
-			c = m;
-			/* ...skipping this much first. */
-			m = lpremul - m;
-			MB_METACHARINIT();
-			for (t = premul; m > 0; ) {
-			    t += MB_METACHARLENCONV(t, &cchar);
-			    m -= WCWIDTH(cchar);
-			}
-			/* Now the rest of the repeated string. */
-			while (c > 0) {
-			    cl = MB_METACHARLENCONV(t, &cchar);
-			    while (cl--)
-				*r++ = *t++;
-			    c -= WCWIDTH(cchar);
+		    if (lpremul) {
+			if ((m = f % lpremul)) {
+			    /*
+			     * Some fraction of the repeated string needed.
+			     */
+			    /* Need this much... */
+			    c = m;
+			    /* ...skipping this much first. */
+			    m = lpremul - m;
+			    MB_METACHARINIT();
+			    for (t = premul; m > 0; ) {
+				t += MB_METACHARLENCONV(t, &cchar);
+				m -= WCWIDTH(cchar);
+			    }
+			    /* Now the rest of the repeated string. */
+			    while (c > 0) {
+				cl = MB_METACHARLENCONV(t, &cchar);
+				while (cl--)
+				    *r++ = *t++;
+				c -= WCWIDTH(cchar);
+			    }
 			}
-		    }
-		    for (cc = f / lpremul; cc--;) {
-			/*
-			 * Repeat the repeated string.
-			 */
-			MB_METACHARINIT();
-			for (c = lpremul, t = premul; c > 0; ) {
-			    cl = MB_METACHARLENCONV(t, &cchar);
-			    while (cl--)
-				*r++ = *t++;
-			    c -= WCWIDTH(cchar);
+			for (cc = f / lpremul; cc--;) {
+			    /*
+			     * Repeat the repeated string.
+			     */
+			    MB_METACHARINIT();
+			    for (c = lpremul, t = premul; c > 0; ) {
+				cl = MB_METACHARLENCONV(t, &cchar);
+				while (cl--)
+				    *r++ = *t++;
+				c -= WCWIDTH(cchar);
+			    }
 			}
 		    }
 		    if (preone) {
@@ -1089,27 +1095,29 @@
 			c -= WCWIDTH(cchar);
 		    }
 		}
-		/* Repeat the repeated string */
-		for (cc = f / lpostmul; cc--;) {
-		    MB_METACHARINIT();
-		    for (c = lpostmul, t = postmul; *t; ) {
-			cl = MB_METACHARLENCONV(t, &cchar);
-			while (cl--)
-			    *r++ = *t++;
-			c -= WCWIDTH(cchar);
+		if (lpostmul) {
+		    /* Repeat the repeated string */
+		    for (cc = f / lpostmul; cc--;) {
+			MB_METACHARINIT();
+			for (c = lpostmul, t = postmul; *t; ) {
+			    cl = MB_METACHARLENCONV(t, &cchar);
+			    while (cl--)
+				*r++ = *t++;
+			    c -= WCWIDTH(cchar);
+			}
 		    }
-		}
-		/*
-		 * See if there's any fraction of the repeated
-		 * string needed to fill up the remaining space.
-		 */
-		if ((m = f % lpostmul)) {
-		    MB_METACHARINIT();
-		    while (m > 0) {
-			cl = MB_METACHARLENCONV(postmul, &cchar);
-			while (cl--)
-			    *r++ = *postmul++;
-			m -= WCWIDTH(cchar);
+		    /*
+		     * See if there's any fraction of the repeated
+		     * string needed to fill up the remaining space.
+		     */
+		    if ((m = f % lpostmul)) {
+			MB_METACHARINIT();
+			while (m > 0) {
+			    cl = MB_METACHARLENCONV(postmul, &cchar);
+			    while (cl--)
+				*r++ = *postmul++;
+			    m -= WCWIDTH(cchar);
+			}
 		    }
 		}
 	    }
Index: Src/utils.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/utils.c,v
retrieving revision 1.137
diff -u -r1.137 utils.c
--- Src/utils.c	13 Sep 2006 20:55:30 -0000	1.137
+++ Src/utils.c	15 Sep 2006 13:00:30 -0000
@@ -527,8 +527,10 @@
     if (widthp) {
 	int wcw = wcwidth(c);
 	*widthp = (s - buf);
-	if (wcw > 0)
+	if (wcw >= 0)
 	    *widthp += wcw;
+	else
+	    (*widthp)++;
     }
     if (swidep)
 	*swidep = s;
@@ -550,12 +552,12 @@
 {
     int wcw;
     /* assume a single-byte character if not valid */
-    if (wc == WEOF)
+    if (wc == WEOF || unset(MULTIBYTE))
 	return 1;
     wcw = wcwidth(wc);
-    /* if not printable, assume zero width */
-    if (wcw <= 0)
-	return 0;
+    /* if not printable, assume width 1 */
+    if (wcw < 0)
+	return 1;
     return wcw;
 }
 
@@ -4077,12 +4079,14 @@
 		num++;
 	    } else if (width) {
 		/*
-		 * Returns -1 if not a printable character; best
-		 * just to ignore these.
+		 * Returns -1 if not a printable character.  We
+		 * turn this into 1 for backward compatibility.
 		 */
 		int wcw = wcwidth(wc);
-		if (wcw > 0)
+		if (wcw >= 0)
 		    num += wcw;
+		else
+		    num++;
 	    } else
 		num++;
 	    laststart = ptr;

-- 
Peter Stephenson <pws@xxxxxxx>                  Software Engineer
CSR PLC, Churchill House, Cambridge Business Park, Cowley Road
Cambridge, CB4 0WZ, UK                          Tel: +44 (0)1223 692070


To access the latest news from CSR copy this link into a web browser:  http://www.csr.com/email_sig.php



Messages sorted by: Reverse Date, Date, Thread, Author