Zsh Mailing List Archive Messages sorted by: Reverse Date, Date, Thread, Author

Re: Camel case word navigation?

X-seq: zsh-users 12987
From: Peter Stephenson <pws@xxxxxxx>
To: zsh-users@xxxxxxxxxx
Subject: Re: Camel case word navigation?
Date: Tue, 24 Jun 2008 12:14:49 +0100
In-reply-to: <200806230916.m5N9GH2L003761@xxxxxxxxxxxxxx>
Mailing-list: contact zsh-users-help@xxxxxxxxxx; run by ezmlm
Organization: CSR
References: <54c0a1810806222241t47f04d53lf2780f48fc7ee2d2@xxxxxxxxxxxxxx> <200806230916.m5N9GH2L003761@xxxxxxxxxxxxxx>

On Mon, 23 Jun 2008 10:16:17 +0100
Peter Stephenson <pws@xxxxxxx> wrote:
> dackz wrote:
> > Does anyone know of a simple way to make zsh match camel case words?
> > I.e., where fooBar^W would become foo (like c-subword-mode in GNU
> > Emacs).
> > 
> > So far all I can think of is that maybe I could do it by customizing
> > match-words-by-style and using select-word-style, but I'm not quite
> > sure where to start with that.
> 
> I don't see a fundamental problem with that but it might be fiddly to get
> right.

As predicted.

Index: Doc/Zsh/contrib.yo
===================================================================
RCS file: /cvsroot/zsh/zsh/Doc/Zsh/contrib.yo,v
retrieving revision 1.81
diff -u -r1.81 contrib.yo
--- Doc/Zsh/contrib.yo	23 Jun 2008 13:34:53 -0000	1.81
+++ Doc/Zsh/contrib.yo	24 Jun 2008 11:11:02 -0000
@@ -459,6 +459,12 @@
 )
 enditem()
 
+All but `tt(default)' can be input as an upper case character, which was
+the same effect but with subword matching turned on.  In this case, words
+with upper case characters are treated specially: each separate run of
+upper case characters, or an upper case character followed by any number of
+other characters, is considered a word.
+
 More control can be obtained using the tt(zstyle) command, as described in
 ifzman(zmanref(zshmodules))\
 ifnzman(noderef(The zsh/zutil Module)).  Each style is looked up in the
@@ -507,6 +513,9 @@
 including `tt(])', `tt(^)' and `tt(-)' as these are special inside
 character classes.
 
+tt(word-style) may also have `tt(-subword)' appended to its value to
+turn on subword matching, as described above.
+
 The style tt(skip-chars) is mostly useful for
 tt(transpose-words) and similar functions.  If set, it gives a count of
 characters starting at the cursor position which will not be considered
Index: Functions/Zle/match-words-by-style
===================================================================
RCS file: /cvsroot/zsh/zsh/Functions/Zle/match-words-by-style,v
retrieving revision 1.6
diff -u -r1.6 match-words-by-style
--- Functions/Zle/match-words-by-style	14 Aug 2006 16:14:41 -0000	1.6
+++ Functions/Zle/match-words-by-style	24 Jun 2008 11:11:02 -0000
@@ -105,7 +105,7 @@
 [[ -z $skip ]] && skip=0
 
 case $wordstyle in
-  (shell) local bufwords
+  (*shell*) local bufwords
 	  # This splits the line into words as the shell understands them.
 	  bufwords=(${(z)LBUFFER})
 	  nwords=${#bufwords}
@@ -133,7 +133,7 @@
 	    wordpat2=${(q)wordpat2}
 	  fi
 	  ;;
-  (*space) spacepat='[[:space:]]#'
+  (*space*) spacepat='[[:space:]]#'
            wordpat1='[^[:space:]]##'
 	   wordpat2=$wordpat1
 	   ;;
@@ -160,8 +160,8 @@
       fi
       # Quote $wc where necessary, because we don't want those
       # characters to be considered as pattern characters later on.
-      if [[ $wordstyle = *specified ]]; then
-        if [[ $wordstyle != un* ]]; then
+      if [[ $wordstyle = *specified* ]]; then
+        if [[ $wordstyle != *unspecified* ]]; then
 	  # The given set of characters are the word characters, nothing else
 	  wordpat1="[${wc}]##"
 	  # anything else is a space.
@@ -189,6 +189,28 @@
 word1=$match[1]
 ws1=$match[2]
 
+if [[ $wordstyle = *subword* ]]; then
+  # The rule here is that a word boundary may be an upper case letter
+  # followed by a lower case letter, or an upper case letter at
+  # the start of a group of upper case letters.  To make
+  # it easier to be consistent, we just use anything that
+  # isn't an upper case characer instead of a lower case
+  # character.
+  # Here the initial "*" will match greedily, so we get the
+  # last such match, as we want.
+  integer epos
+  if [[ $word1 = (#b)(*)([[:upper:]][^[:upper:]]*) ]]; then
+    (( epos = ${#match[1]} ))
+  fi
+  if [[ $word1 = (#b)(*[^[:upper:]])([[:upper:]]*) ]]; then
+    (( ${#match[1]} > epos ))  &&  (( epos = ${#match[1]} ))
+  fi
+  if (( epos > 0 )); then
+    pat1+=$word1[1,epos]
+    word1=$word1[epos+1,-1]
+  fi
+fi
+
 match=()
 charskip=
 repeat $skip charskip+=\?
@@ -200,4 +222,30 @@
 word2=$match[2]
 ws3=$match[3]
 
+if [[ $wordstyle = *subword* ]]; then
+  # Do we have a group of upper case characters at the start
+  # of word2 (that don't form the entire word)?
+  # Again, rely on greedy matching of first pattern.
+  if [[ $word2 = (#b)([[:upper:]][[:upper:]]##)(*) && -n $match[2] ]]; then
+    # Yes, so the last one is new word boundary.
+    (( epos = ${#match[1]} - 1 ))
+    # Otherwise, do we have upper followed by non-upper not
+    # at the start?  Ignore the initial character, we already
+    # know it's a word boundary so it can be an upper case character
+    # if it wants.
+  elif [[ $word2 = (#b)(?[^[:upper:]]##)[[:upper:]]* ]]; then
+    (( epos = ${#match[1]} ))
+  else
+    (( epos = 0 ))
+  fi
+  if (( epos )); then
+    # Careful: if we matched a subword there's no whitespace immediately
+    # after the matched word, so ws3 should be empty and any existing
+    # value tacked onto pat2.
+    pat2="${word2[epos+1,-1]}$ws3$pat2"
+    ws3=
+    word2=$word2[1,epos]
+  fi
+fi
+
 matched_words=("$pat1" "$word1" "$ws1" "$ws2" "$word2" "$ws3" "$pat2")
Index: Functions/Zle/select-word-style
===================================================================
RCS file: /cvsroot/zsh/zsh/Functions/Zle/select-word-style,v
retrieving revision 1.2
diff -u -r1.2 select-word-style
--- Functions/Zle/select-word-style	3 Apr 2003 09:58:17 -0000	1.2
+++ Functions/Zle/select-word-style	24 Jun 2008 11:11:02 -0000
@@ -10,7 +10,7 @@
 
 [[ -z $1 ]] && autoload read-from-minibuffer
 
-local REPLY detail f
+local REPLY detail f wordstyle
 
 if ! zle -l $word_functions[1]; then
     for f in $word_functions; do
@@ -25,6 +25,7 @@
     if [[ -n $WIDGET && -z $1 ]]; then
 	read-from-minibuffer -k1 "Word styles (hit return for more detail):
 (b)ash (n)ormal (s)hell (w)hitespace (d)efault (q)uit
+(B), (N), (S), (W) as above with subword matching
 ${detail}? " || return 1
     else
 	REPLY=$1
@@ -33,31 +34,31 @@
     detail=
 
     case $REPLY in
-	(b*)
+	([bB]*)
 	# bash style
-	zstyle ':zle:*' word-style standard
+	wordstyle=standard
 	zstyle ':zle:*' word-chars ''
 	;;
 
-	(n*)
+	([nN]*)
 	# normal zsh style
-	zstyle ':zle:*' word-style standard
+	wordstyle=standard
 	zstyle ':zle:*' word-chars "$WORDCHARS"
 	;;
 
-	(s*)
+	([sS]*)
 	# shell command arguments or special tokens
-	zstyle ':zle:*' word-style shell
+	wordstyle=shell
 	;;
 
-	(w*)
+	([wW]*)
 	# whitespace-delimited
-	zstyle ':zle:*' word-style space
+	wordstyle=space
 	;;
 
 	(d*)
 	# default: could also return widgets to builtins here
-	zstyle -d ':zle:*' word-style
+	wordstyle=
 	zstyle -d ':zle:*' word-chars
 	;;
 
@@ -84,5 +85,12 @@
 	continue
 	;;
     esac
+
+    if [[ -n $wordstyle ]]; then
+      if [[ $REPLY = [[:upper:]]* ]]; then
+	wordstyle+=-subword
+      fi
+      zstyle ':zle:*' word-style $wordstyle
+    fi
     return
 done


-- 
Peter Stephenson <pws@xxxxxxx>                  Software Engineer
CSR PLC, Churchill House, Cambridge Business Park, Cowley Road
Cambridge, CB4 0WZ, UK                          Tel: +44 (0)1223 692070

Follow-Ups:
- Re: Camel case word navigation?
  - From: dackz

References:
- Camel case word navigation?
  - From: dackz
- Re: Camel case word navigation?
  - From: Peter Stephenson

Messages sorted by: Reverse Date, Date, Thread, Author