Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

PATCH: pfxlen()



This should fix pfxlen(), which is used in a few places in zle to find
common prefixes, so that it doesn't stop in the middle of a multibyte
character.  It's actually quite hard to exercise the function, so it's
not particularly well tested.

As it's likely to stay using multibyte characters while wpfxlen() uses
wide characters, I've removed Andrey's comments about merging the two.

Index: Src/Zle/zle_refresh.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_refresh.c,v
retrieving revision 1.40
diff -u -r1.40 zle_refresh.c
--- Src/Zle/zle_refresh.c	1 Nov 2005 04:02:02 -0000	1.40
+++ Src/Zle/zle_refresh.c	1 Nov 2005 22:27:08 -0000
@@ -934,7 +934,6 @@
 #define tc_upcurs(X)	(void) tcmultout(TCUP, TCMULTUP, (X))
 #define tc_leftcurs(X)	(void) tcmultout(TCLEFT, TCMULTLEFT, (X))
 
-/* TODO remove it when pfxlen is fixed */
 static int
 wpfxlen(REFRESH_STRING s, REFRESH_STRING t)
 {
@@ -1143,7 +1142,6 @@
 		   makes it cheaper to delete intermediate characters
 		   eg. oldline: hifoobar \ hopefully cheaper here to delete two
 		   newline: foobar	 / characters, then we have six matches */
-		/* TODO replace wpfxlen back with pfxlen when the latter is fixed */
 		if (tccan(TCDEL)) {
 		    for (i = 1; *(ol + i); i++)
 			if (tcdelcost(i) < wpfxlen(ol + i, nl)) {
Index: Src/Zle/zle_tricky.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_tricky.c,v
retrieving revision 1.59
diff -u -r1.59 zle_tricky.c
--- Src/Zle/zle_tricky.c	1 Nov 2005 03:26:56 -0000	1.59
+++ Src/Zle/zle_tricky.c	1 Nov 2005 22:27:09 -0000
@@ -1899,7 +1899,12 @@
     return runhookdef(COMPLETEHOOK, (void *) &dat);
 }
 
-/* Return the length of the common prefix of s and t. */
+/*
+ * Return the length of the common prefix of s and t.
+ * s and t are both metafied; the length returned is a raw byte count
+ * into both strings, excluding any common bytes that form less than
+ * a complete wide character.
+ */
 
 /**/
 mod_export int
@@ -1907,9 +1912,46 @@
 {
     int i = 0;
 
+#ifdef MULTIBYTE_SUPPORT
+    wchar_t wc;
+    mbstate_t ps;
+    int ret, lasti = 0;
+    char inc;
+
+    memset(&ps, 0, sizeof(mbstate_t));
+    while (*s) {
+	if (*s == Meta) {
+	    if (*t != Meta || t[1] != s[1])
+		break;
+	    inc = s[1] ^ 32;
+	    i += 2;
+	    s += 2;
+	    t += 2;
+	} else {
+	    if (*s != *t)
+		break;
+	    inc = *s;
+	    i++;
+	    s++;
+	    t++;
+	}
+
+	ret = mbrtowc(&wc, &inc, 1, &ps);
+	if (ret == -1) {
+	    /* error */
+	    break;
+	} else if (ret >= 0) {
+	    /* successfully found complete character, record position */
+	    lasti = i;
+	}
+	/* Otherwise, not found a complete character: keep trying. */
+    }
+    return lasti;
+#else
     while (*s && *s == *t)
 	s++, t++, i++;
     return i;
+#endif
 }
 
 /* Return the length of the common suffix of s and t. */

-- 
Peter Stephenson <p.w.stephenson@xxxxxxxxxxxx>
Web page still at http://www.pwstephenson.fsnet.co.uk/



Messages sorted by: Reverse Date, Date, Thread, Author