Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Zle widgets for vi-mode word motion: patch and a question



The attached patch will fix a multibyte-related bug of a few
Zle widgets (see [1] below), but it will also introduce an
incompatibility  with the current behavior of vi-backword-word-end
(see [2]). I don't understand why currently vi-backword-word-end
behaves this way, and the patch actually will increase the
compatibility with the vim editor (/usr/bin/vim).

Is it OK to change the behavior of vi-backword-word-end?

[1] A problem of Zle widgets related with vi-mode word motion
   (vi-forward-word, vi-backward-word, etc.)

For example, in the following command line (assume 'bindkey -v'):

zsh% ls /ab/cd/ef

if the cursor is on the 1st '/', hitting 'w' (vi-forward-word)
repeatedly will move the cursor to 'a', '/', 'c', '/', 'e', 'f';
this is the expected behavior. But in the following case:

zsh% ls /あい/うえ/おか

if the cursor is on the 1st '/', hitting a single 'w' will move
the cursor to the last 'か'. This is because characters for which
 Z_vialnum(X) returns false are all considered to be in the same
class, so that '/あい/うえ/おか' is just a single word.

In the attached patch, I moved a function wordclass() from
textobjects.c to zle_word.c and modified it so that it recognizes
four character classes: blank(0), vialnum(1), punctuation(2)
and all the other(3).

[2] current (strange?) behavior of vi-backword-word-end

Suppose we have the following 5-line command line (no multibyte
characters but two blank lines):

-------------
zsh% echo abc

=+/

def
-------------
(you need to type 'echo abc<ESC>o<ESC>o=+/<ESC>o<ESC>odef<ESC>')

If the cursor is on the 'f' of the last line, hitting 'ge' (bound
to vi-backward-word-end) will move the cursor to the beginning of
the previous blank line, and another 'ge' will move it to the '/'
of the third line; these are as expected.

But one more 'ge' will move the cursor to the 'c' on the first
line, skipping a blank line.
This seems to happen when the cursor is on a word made up only
of non-alpha_numeric characters (such as '/', '=', etc.).
The code for this widget is vibackwordwordend() in zle_word.c,
but it is very hard to understand for me.

In the vim editor, 'ge' never skips blank lines.
vim's help document (type ':help ge<CR>' in vim) explicitly says
"An empty line is also considered to be a WORD".

I first thought this is just a bug of the current zsh, but
X02zlevi.ztst explicitly tests this behavior (line 466 and below),
so this may be intentional?? Is it OK to change this?



diff --git a/Src/Zle/textobjects.c b/Src/Zle/textobjects.c
index bf83906f2..c93777b65 100644
--- a/Src/Zle/textobjects.c
+++ b/Src/Zle/textobjects.c
@@ -30,13 +30,6 @@
 #include "zle.mdh"
 #include "textobjects.pro"
 
-/* class of character: 0 is whitespace, 1 is word character, 2 is other */
-static int
-wordclass(ZLE_CHAR_T x)
-{
-    return (ZC_iblank(x) ? 0 : ((ZC_ialnum(x) || (ZWC('_') == x)) ? 1 : 2));
-}
-
 static int
 blankwordclass(ZLE_CHAR_T x)
 {
diff --git a/Src/Zle/zle.h b/Src/Zle/zle.h
index 07b310180..8261da92b 100644
--- a/Src/Zle/zle.h
+++ b/Src/Zle/zle.h
@@ -67,6 +67,7 @@ typedef wint_t   ZLE_INT_T;
 #define ZC_inblank iswspace
 #define ZC_iupper iswupper
 #define ZC_iword(x) wcsitype((x), IWORD)
+#define ZC_ipunct iswpunct
 
 #define ZC_tolower towlower
 #define ZC_toupper towupper
@@ -153,6 +154,7 @@ static inline int ZS_strncmp(ZLE_STRING_T s1, ZLE_STRING_T s2, size_t l)
 #define ZC_inblank inblank
 #define ZC_iupper isupper
 #define ZC_iword iword
+#define ZC_ipunct ispunct
 
 #define ZC_tolower tulower
 #define ZC_toupper tuupper
diff --git a/Src/Zle/zle_word.c b/Src/Zle/zle_word.c
index e4a878eab..4910d765b 100644
--- a/Src/Zle/zle_word.c
+++ b/Src/Zle/zle_word.c
@@ -64,7 +64,18 @@ forwardword(char **args)
     return 0;
 }
 
-#define Z_vialnum(X) (ZC_ialnum(X) || (ZWC('_') == X))
+/*
+ * class of character (for vi-mode word motion)
+ * 0: blank,  1: alnum or _,  2: punctuation,  3: the others
+ */
+
+/**/
+int
+wordclass(ZLE_CHAR_T x)
+{
+    return (ZC_iblank(x) ? 0 : ((ZC_ialnum(x) || (ZWC('_') == x)) ? 1 :
+		ZC_ipunct(x) ? 2 : 3));
+}
 
 /**/
 int
@@ -81,13 +92,10 @@ viforwardword(char **args)
     }
     while (n--) {
 	int nl;
-	if (Z_vialnum(zleline[zlecs]))
-	    while (zlecs != zlell && Z_vialnum(zleline[zlecs]))
-		INCCS();
-	else
-	    while (zlecs != zlell && !Z_vialnum(zleline[zlecs]) &&
-		    !ZC_inblank(zleline[zlecs]))
-		INCCS();
+	int cc = wordclass(zleline[zlecs]);
+	while (zlecs != zlell && wordclass(zleline[zlecs]) == cc) {
+	    INCCS();
+	}
 	if (wordflag && !n)
 	    return 0;
 	nl = (zleline[zlecs] == ZWC('\n'));
@@ -208,26 +216,17 @@ viforwardwordend(char **args)
 	    zlecs = pos;
 	}
 	if (zlecs != zlell) {
+	    int cc;
 	    pos = zlecs;
 	    INCPOS(pos);
-	    if (Z_vialnum(zleline[pos])) {
-		for (;;) {
-		    zlecs = pos;
-		    if (zlecs == zlell)
+	    cc = wordclass(zleline[pos]);
+	    for (;;) {
+		zlecs = pos;
+		if (zlecs == zlell)
+		    break;
+		INCPOS(pos);
+		if (wordclass(zleline[pos]) != cc)
 			break;
-		    INCPOS(pos);
-		    if (!Z_vialnum(zleline[pos]))
-			break;
-		}
-	    } else {
-		for (;;) {
-		    zlecs = pos;
-		    if (zlecs == zlell)
-			break;
-		    INCPOS(pos);
-		    if (Z_vialnum(zleline[pos]) || ZC_inblank(zleline[pos]))
-			break;
-		}
 	    }
 	}
     }
@@ -295,24 +294,14 @@ vibackwardword(char **args)
 	}
 	if (zlecs) {
 	    int pos = zlecs;
-	    if (Z_vialnum(zleline[pos])) {
-		for (;;) {
-		    zlecs = pos;
-		    if (zlecs == 0)
-			break;
-		    DECPOS(pos);
-		    if (!Z_vialnum(zleline[pos]))
-			break;
-		}
-	    } else {
-		for (;;) {
-		    zlecs = pos;
-		    if (zlecs == 0)
-			break;
-		    DECPOS(pos);
-		    if (Z_vialnum(zleline[pos]) || ZC_inblank(zleline[pos]))
-			break;
-		}
+	    int cc = wordclass(zleline[pos]);
+	    for (;;) {
+		zlecs = pos;
+		if (zlecs == 0)
+		    break;
+		DECPOS(pos);
+		if (wordclass(zleline[pos]) != cc || ZC_inblank(zleline[pos]))
+		    break;
 	    }
 	}
     }
@@ -368,17 +357,10 @@ vibackwardwordend(char **args)
 	return ret;
     }
     while (n-- && zlecs > 1) {
-	int start = 0;
-	if (Z_vialnum(zleline[zlecs]))
-	    start = 1;
-	else if (!ZC_inblank(zleline[zlecs]))
-	    start = 2;
+	int cc = wordclass(zleline[zlecs]);
 	DECCS();
 	while (zlecs) {
-	    int same = (start != 1) && ZC_iblank(zleline[zlecs]);
-	    if (start)
-		same |= Z_vialnum(zleline[zlecs]);
-	    if (same == (start == 2))
+	    if (wordclass(zleline[zlecs]) != cc || ZC_iblank(zleline[zlecs]))
 		break;
 	    DECCS();
 	}
@@ -494,26 +476,17 @@ vibackwardkillword(UNUSED(char **args))
 	    x = pos;
 	}
 	if (x > lim) {
+	    int cc;
 	    int pos = x;
 	    DECPOS(pos);
-	    if (Z_vialnum(zleline[pos])) {
-		for (;;) {
-		    x = pos;
-		    if (x <= lim)
-			break;
-		    DECPOS(pos);
-		    if (!Z_vialnum(zleline[pos]))
-			break;
-		}
-	    } else {
-		for (;;) {
-		    x = pos;
-		    if (x <= lim)
-			break;
-		    DECPOS(pos);
-		    if (Z_vialnum(zleline[pos]) || ZC_iblank(zleline[pos]))
-			break;
-		}
+	    cc = wordclass(zleline[pos]);
+	    for (;;) {
+		x = pos;
+		if (x < lim)
+		    break;
+		DECPOS(pos);
+		if (wordclass(zleline[pos]) != cc)
+		    break;
 	    }
 	}
     }
diff --git a/Test/X02zlevi.ztst b/Test/X02zlevi.ztst
index d3b533490..4e7966e12 100644
--- a/Test/X02zlevi.ztst
+++ b/Test/X02zlevi.ztst
@@ -1,6 +1,16 @@
 # Tests of the vi mode of ZLE
 
 %prep
+  unset -m LC_\*
+  ZSH_TEST_LANG=
+  langs=(en_{US,GB}.{UTF-,utf}8 en.UTF-8
+	 $(locale -a 2>/dev/null | egrep 'utf8|UTF-8'))
+  for LANG in $langs; do
+    if [[ é = ? ]]; then
+      ZSH_TEST_LANG=$LANG 
+      break;
+    fi
+  done
   if [[ $OSTYPE = cygwin ]]; then
     ZTST_unimplemented="the zsh/zpty module does not work on Cygwin"
   elif ( zmodload zsh/zpty 2>/dev/null ); then
@@ -463,12 +473,12 @@
 >  aww
 >CURSOR: 0
 
-  zletest $' --ww  ww--\eo\eoww\eo\eo--\eo\eo  ww\e' gei{a,=,b,c,=,d,e,=,f}$'\e'
+  zletest $' --ww  ww--\eo\eoww\eo\eo--\eo\eo  ww\e' gei{a,=,b,c,d,=,e,f,=,g}$'\e'
 0:backward word end
->BUFFER: f -=-wew  wdw-=-
->c
->wbw
->
+>BUFFER: g -=-wfw  wew-=-
+>d
+>wcw
+>b
 >-=-
 >a
 >  ww
@@ -529,6 +539,15 @@
 >  wwe
 >CURSOR: 29
 
+  if [[ -z $ZSH_TEST_LANG ]]; then
+    ZTST_skip="no UTF-8 locale for Zle vi-mode test"
+  else
+    zletest $'/あいう/えお/かき\ebxgegex0wxex'
+  fi
+0:word motion with multibyte characters
+>BUFFER: /い/え/き
+>CURSOR: 2
+
   zletest $'    ----word    ----    word    word----    ----\e42|daw30|daw22|daw14|daw2|daw'
 0:delete all word on blanks
 >BUFFER: word






Messages sorted by: Reverse Date, Date, Thread, Author