Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

PATCH: mbrtowc/wcrtomb rationalisation



This rationalises the code always to use the restartable form of
multibyte to wide character conversion and vice versa.  This protects us
against an odd state being left behind in the library's internal shift
register.  (It doesn't make the code interrupt-safe, since I'm using a
global shift register, but most of the shell's code isn't anyway so we
already have workarounds.)

There's one exception:  in the \u/\U getkeystring() code I've kept the
wctomb() for compatibility since this code predates the multibyte
support and doesn't depend on having the complete library.

I also obsessively pruned some trailing whitespace.

Index: ChangeLog
===================================================================
RCS file: /cvsroot/zsh/zsh/ChangeLog,v
retrieving revision 1.3199
diff -u -r1.3199 ChangeLog
--- ChangeLog	3 Aug 2006 10:19:20 -0000	1.3199
+++ ChangeLog	3 Aug 2006 14:58:04 -0000
@@ -58,7 +58,7 @@
 	accept-and-menu-complete, perhaps.
 
 	* 22562: Src/glob.c, Test/D07multibyte.ztst: make ${...#...} etc.
-	understand multibyte characters.	
+	understand multibyte characters.
 
 2006-07-29  Barton E. Schaefer  <schaefer@xxxxxxx>
 
Index: Src/prompt.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/prompt.c,v
retrieving revision 1.38
diff -u -r1.38 prompt.c
--- Src/prompt.c	9 Jul 2006 14:47:22 -0000	1.38
+++ Src/prompt.c	3 Aug 2006 14:58:06 -0000
@@ -771,6 +771,7 @@
 	    /* FALL THROUGH */
 	default:
 	    /* Take full wide character in one go */
+	    mb_metacharinit();
 	    pc = wcs_nicechar(cc, NULL, NULL);
 	    break;
 	}
Index: Src/utils.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/utils.c,v
retrieving revision 1.130
diff -u -r1.130 utils.c
--- Src/utils.c	1 Aug 2006 21:28:04 -0000	1.130
+++ Src/utils.c	3 Aug 2006 14:58:08 -0000
@@ -279,6 +279,7 @@
 	    case 'c':
 		num = va_arg(ap, int);
 #ifdef MULTIBYTE_SUPPORT
+		mb_metacharinit();
 		zputs(wcs_nicechar(num, NULL, NULL), stderr);
 #else
 		zputs(nicechar(num), stderr);
@@ -402,6 +403,20 @@
 
 /**/
 #ifdef MULTIBYTE_SUPPORT
+static mbstate_t mb_shiftstate;
+
+/*
+ * Initialise multibyte state: called before a sequence of
+ * wcs_nicechar() or mb_metacharlenconv().
+ */
+
+/**/
+mod_export void
+mb_metacharinit(void)
+{
+    memset(&mb_shiftstate, 0, sizeof(mb_shiftstate));
+}
+
 /*
  * The number of bytes we need to allocate for a "nice" representation
  * of a multibyte character.
@@ -430,6 +445,9 @@
  * Either the initial ASCII part or the wide character part may be empty
  * (but not both).  (Note the complication that the wide character
  * part may contain metafied characters.)
+ *
+ * The caller needs to call mb_metacharinit() before the first call, to
+ * set up the multibyte shift state for a range of characters.
  */
 
 /**/
@@ -475,8 +493,11 @@
 	}
     }
 
-    if (ret == -1 ||
-	(ret = wctomb(mbstr, c)) == -1) {
+    if (ret != -1)
+	ret = wcrtomb(mbstr, c, &mb_shiftstate);
+
+    if (ret == -1) {
+	memset(&mb_shiftstate, 0, sizeof(mb_shiftstate));
 	/*
 	 * Can't or don't want to convert character: use UCS-2 or
 	 * UCS-4 code in print escape format.
@@ -513,18 +534,6 @@
 /**/
 #endif /* MULTIBYTE_SUPPORT */
 
-
-/* Output a string's visible representation. */
-
-#if 0 /**/
-void
-nicefputs(char *s, FILE *f)
-{
-    for (; *s; s++)
-	zputs(nicechar(*s), f);
-}
-#endif
-
 /* get a symlink-free pathname for s relative to PWD */
 
 /**/
@@ -699,7 +708,7 @@
 #ifdef HAVE_GETPWUID
     struct passwd *pswd;
     uid_t current_uid;
- 
+
     current_uid = getuid();
     if (current_uid != cached_uid) {
 	cached_uid = current_uid;
@@ -900,12 +909,12 @@
 mod_export LinkList prepromptfns;
 
 /* the last time we checked mail */
- 
+
 /**/
 time_t lastmailcheck;
- 
+
 /* the last time we checked the people in the WATCH variable */
- 
+
 /**/
 time_t lastwatch;
 
@@ -2818,6 +2827,7 @@
 wcsitype(wchar_t c, int itype)
 {
     int len;
+    mbstate_t mbs;
     VARARR(char, outstr, MB_CUR_MAX);
 
     if (!isset(MULTIBYTE))
@@ -2830,7 +2840,8 @@
      * If it doesn't, use iswalnum on the original character.
      * If that fails, resort to the appropriate wide character array.
      */
-    len = wctomb(outstr, c);
+    memset(&mbs, 0, sizeof(mbs));
+    len = wcrtomb(outstr, c, &mbs);
 
     if (len == 0) {
 	/* NULL is special */
@@ -3725,6 +3736,7 @@
     ptr = unmetafy(ums, &umlen);
 
     memset(&mbs, 0, sizeof mbs);
+    mb_metacharinit();
     while (umlen > 0) {
 	size_t cnt = eol ? MB_INVALID : mbrtowc(&c, ptr, umlen, &mbs);
 
@@ -3853,20 +3865,6 @@
     return width;
 }
 
-static mbstate_t mb_shiftstate;
-
-/*
- * Initialise multibyte state: called before a sequence of
- * mb_metacharlenconv().
- */
-
-/**/
-void
-mb_metacharinit(void)
-{
-    memset(&mb_shiftstate, 0, sizeof(mb_shiftstate));
-}
-
 /*
  * Length of metafied string s which contains the next multibyte
  * character; single (possibly metafied) character if string is not null
Index: Src/Zle/complist.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/complist.c,v
retrieving revision 1.84
diff -u -r1.84 complist.c
--- Src/Zle/complist.c	30 May 2006 22:35:04 -0000	1.84
+++ Src/Zle/complist.c	3 Aug 2006 14:58:10 -0000
@@ -588,6 +588,7 @@
     if (colors)
 	initiscol(colors);
 
+    mb_metacharinit();
     while (umleft > 0) {
 	size_t cnt = eol ? MB_INVALID : mbrtowc(&cc, uptr, umleft, &mbs);
 
@@ -1964,7 +1965,9 @@
 #ifdef MULTIBYTE_SUPPORT
 	if (lastchar_wide_valid)
 	{
-	    int len = wctomb(s, lastchar_wide);
+	    mbstate_t mbs;
+	    memset(&mbs, 0, sizeof(mbs));
+	    int len = wcrtomb(s, lastchar_wide, &mbs);
 	    if (len < 0)
 		len = 0;
 	    s[len] = '\0';
Index: Src/Zle/zle.h
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle.h,v
retrieving revision 1.32
diff -u -r1.32 zle.h
--- Src/Zle/zle.h	1 Aug 2006 21:28:04 -0000	1.32
+++ Src/Zle/zle.h	3 Aug 2006 14:58:10 -0000
@@ -71,8 +71,6 @@
 #define ZC_tolower towlower
 #define ZC_toupper towupper
 
-#define ZC_nicechar(c) wcs_nicechar(c, NULL, NULL)
-
 #define LASTFULLCHAR	lastchar_wide
 
 #else  /* Not MULTIBYTE_SUPPORT: old single-byte code */
@@ -100,8 +98,6 @@
  */
 #define ZMB_nicewidth	niceztrlen
 
-#define ZC_nicechar nicechar
-
 #ifdef __GNUC__
 static inline size_t ZS_strlen(ZLE_STRING_T s)
 { return strlen((char*)s); }
Index: Src/Zle/zle_tricky.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_tricky.c,v
retrieving revision 1.69
diff -u -r1.69 zle_tricky.c
--- Src/Zle/zle_tricky.c	1 Aug 2006 21:28:04 -0000	1.69
+++ Src/Zle/zle_tricky.c	3 Aug 2006 14:58:11 -0000
@@ -2464,16 +2464,20 @@
     ZLE_STRING_T bangq;
     ZLE_CHAR_T zlebangchar[1];
     int ret;
+#ifdef MULTIBYTE_SUPPORT
+    mbstate_t mbs;
+#endif
+
     fixmagicspace();
 
 #ifdef MULTIBYTE_SUPPORT
     /*
-     * TODO: bangchar should really be a multibyte string representing
-     * a single character, since there's no fundamental reason why
-     * it shouldn't be a Unicode character.  In practice this is
-     * very minor, however.
+     * Use mbrtowc() here for consistency and to ensure the
+     * state is initialised properly.  bangchar is unsigned char,
+     * but must be ASCII, so we simply cast the pointer.
      */
-    if (mbtowc(zlebangchar, (char *)&bangchar, 1) < 0)
+    memset(&mbs, 0, sizeof(mbs));
+    if (mbrtowc(zlebangchar, (char *)&bangchar, 1, &mbs) < 0)
 	return selfinsert(args);
 #else
     zlebangchar[0] = bangchar;
Index: Src/Zle/zle_utils.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_utils.c,v
retrieving revision 1.38
diff -u -r1.38 zle_utils.c
--- Src/Zle/zle_utils.c	1 Aug 2006 21:28:04 -0000	1.38
+++ Src/Zle/zle_utils.c	3 Aug 2006 14:58:11 -0000
@@ -135,17 +135,20 @@
     char *s;
     int i, j;
     size_t mb_len = 0;
+    mbstate_t mbs;
 
     s = zalloc(inll * MB_CUR_MAX + 1);
 
     outcs = 0;
+    memset(&mbs, 0, sizeof(mbs));
     for (i=0; i < inll; i++, incs--) {
 	if (incs == 0)
 	    outcs = mb_len;
-	j = wctomb(s + mb_len, instr[i]);
+	j = wcrtomb(s + mb_len, instr[i], &mbs);
 	if (j == -1) {
 	    /* invalid char; what to do? */
 	    s[mb_len++] = ZWC('?');
+	    memset(&mbs, 0, sizeof(mbs));
 	} else {
 	    mb_len += j;
 	}
@@ -780,6 +783,7 @@
     p = unmetafy(umsg, &ulen);
     memset(&mbs, 0, sizeof mbs);
 
+    mb_metacharinit();
     while (ulen > 0) {
 	char const *n;
 	if (*p == '\n') {

-- 
Peter Stephenson <pws@xxxxxxx>                  Software Engineer
CSR PLC, Churchill House, Cambridge Business Park, Cowley Road
Cambridge, CB4 0WZ, UK                          Tel: +44 (0)1223 692070


To access the latest news from CSR copy this link into a web browser:  http://www.csr.com/email_sig.php



Messages sorted by: Reverse Date, Date, Thread, Author