Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

PATCH: 3.1.5: case-insensitive globbing via flags



Here is the patch for case-insensitive globbing, using the syntax (#i)
to turn it on, (#I) to turn it off, and (#l) for lowercase only to
match case-insensitively.

Since there was enthusiasm for being able to turn this on for the
whole of a filename path, I took the path of least resistance and
built it in as standard, so you have to turn it off again if you only
want it for an early part of the path (unusual, I would think).
Matching . and .. , which must never match a pattern, only a literal
string, turned out to be tricky, but it now handles (assuming you're
in the Src directory of the zsh source):

(#i)../src/modules    ) ->  ../Src/Modules
..(#i)/src/modules    )
../Src/(#i)../src/modules   ->  ../Src/../Src/Modules

and the only thing which I know doesn't work is .(#i). , which frankly
doesn't worry me.  (This only applies to filenames, of course.)

I got bored with gcc telling me that there were dangling else's, so I
added a few irrelevant braces in glob.c as well.

Sven will recognise about four lines of this patch :-).


*** Doc/Zsh/expn.yo.ci	Mon Nov  2 11:31:26 1998
--- Doc/Zsh/expn.yo	Tue Nov  3 13:32:28 1998
***************
*** 840,845 ****
--- 840,882 ----
  (Like `tt(LPAR()^LPAR())...tt(RPAR()RPAR())'.)
  )
  enditem()
+ subsect(Globbing Flags)
+ There are various flags which affect any text to their right up to the
+ end of the enclosing group or to the end of the pattern; they require
+ the tt(EXTENDED_GLOB) option. All take the form
+ tt(LPAR()#)var(X)tt(RPAR()) where var(X) may be one of the following
+ characters:
+ 
+ startitem()
+ item(i)(
+ Case insensitive:  upper or lower case characters in the pattern match
+ upper or lower case characters.
+ )
+ item(l)(
+ Lower case characters in the pattern match upper or lower case
+ characters; upper case characters in the pattern still only match
+ upper case characters.
+ )
+ item(I)(
+ Case sensitive:  locally negates the effect of tt(i) or tt(l) from
+ that point on.
+ )
+ enditem()
+ 
+ For example, the test string tt(fooxx) can be matched by the pattern
+ tt(LPAR()#i)tt(RPAR()FOOXX), but not by tt(LPAR()#l)tt(RPAR()FOOXX),
+ tt(LPAR()#i)tt(RPAR()FOO)tt(LPAR()#I)tt(RPAR()XX) or
+ tt(LPAR()LPAR()#i)tt(RPAR()FOOX)tt(RPAR()X).
+ 
+ When using the ksh syntax for grouping both tt(KSH_GLOB) and
+ tt(EXTENDED_GLOB) must be set and the left parenthesis should be
+ preceded by tt(@).  Note also that the flags do not affect letters
+ inside tt([...]) groups, in other words tt(LPAR()#i)tt(RPAR()[a-z])
+ still matches only lowercase letters.  Finally, note that when
+ examining whole paths case-insensitively every directory must be
+ searched for all files which match, so that a pattern of the form
+ tt(LPAR()#i)tt(RPAR()/foo/bar/...) is potentially slow.
+ 
  subsect(Recursive Globbing)
  A pathname component of the form `tt(LPAR())var(foo)tt(/RPAR()#)'
  matches a path consisting of zero or more directories
*** Misc/globtests.ci	Mon Nov  2 14:33:59 1998
--- Misc/globtests	Tue Nov  3 11:35:36 1998
***************
*** 103,107 ****
--- 103,116 ----
  t [             [[]
  t ]             []]
  t []            [^]]]
+ t fooxx         (#i)FOOXX
+ f fooxx         (#l)FOOXX
+ t FOOXX         (#l)fooxx
+ f fooxx         (#i)FOO(#I)X(#i)X
+ t fooXx         (#i)FOO(#I)X(#i)X
+ t fooxx         ((#i)FOOX)x
+ f fooxx         ((#i)FOOX)X
+ f BAR           (bar|(#i)foo)
+ t FOO           (bar|(#i)foo)
  EOT
  print "$failed tests failed."
*** Misc/globtests.ksh.ci	Sat Apr 25 19:09:20 1998
--- Misc/globtests.ksh	Tue Nov  3 11:35:54 1998
***************
*** 1,6 ****
  #!/usr/local/bin/zsh -f
  
! setopt kshglob
  
  failed=0
  while read res str pat; do
--- 1,6 ----
  #!/usr/local/bin/zsh -f
  
! setopt kshglob extendedglob
  
  failed=0
  while read res str pat; do
***************
*** 87,91 ****
--- 87,100 ----
  t foo           *(!(foo))
  f foob          !(foo)b*
  t foobb         !(foo)b*
+ t fooxx         (#i)FOOXX
+ f fooxx         (#l)FOOXX
+ t FOOXX         (#l)fooxx
+ f fooxx         (#i)FOO@(#I)X@(#i)X
+ t fooXx         (#i)FOO@(#I)X@(#i)X
+ t fooxx         @((#i)FOOX)x
+ f fooxx         @((#i)FOOX)X
+ f BAR           @(bar|(#i)foo)
+ t FOO           @(bar|(#i)foo)
  EOT
  print "$failed tests failed."
*** Src/glob.c.ci	Mon Nov  2 11:31:26 1998
--- Src/glob.c	Tue Nov  3 12:11:24 1998
***************
*** 110,115 ****
--- 110,117 ----
  #define C_CLOSURE	(C_ONEHASH|C_TWOHASH|C_OPTIONAL|C_STAR)
  #define C_LAST		16
  #define C_PATHADD	32
+ #define C_LCMATCHUC	64
+ #define C_IGNCASE	128
  
  /* Test macros for the above */
  #define CLOSUREP(c)	(c->stat & C_CLOSURE)
***************
*** 305,319 ****
      if (!q)
  	return;
  
!     if ((closure = q->closure))	/* (foo/)# - match zero or more dirs */
  	if (q->closure == 2)	/* (foo/)## - match one or more dirs */
  	    q->closure = 1;
  	else
  	    scanner(q->next);
      c = q->comp;
      /* Now the actual matching for the current path section. */
!     if (!(c->next || c->left) && !haswilds(c->str)) {
! 	/* It's a straight string to the end of the path section. */
  	int l = strlen(c->str);
  
  	if (l + !l + pathpos - pathbufcwd >= PATH_MAX) {
--- 307,330 ----
      if (!q)
  	return;
  
!     if ((closure = q->closure)) {
! 	/* (foo/)# - match zero or more dirs */
  	if (q->closure == 2)	/* (foo/)## - match one or more dirs */
  	    q->closure = 1;
  	else
  	    scanner(q->next);
+     }
      c = q->comp;
      /* Now the actual matching for the current path section. */
!     if (!(c->next || c->left) && !haswilds(c->str)
! 	&& (!(c->stat & (C_LCMATCHUC|C_IGNCASE))
! 	    || !strcmp(".", c->str) || !strcmp("..", c->str))) {
! 	/*
! 	 * We always need to match . and .. explicitly, even if we're
! 	 * checking other strings for case-insensitive matches.
! 	 *
! 	 * It's a straight string to the end of the path section.
! 	 */
  	int l = strlen(c->str);
  
  	if (l + !l + pathpos - pathbufcwd >= PATH_MAX) {
***************
*** 436,441 ****
--- 447,498 ----
  
  /* Parse a series of path components pointed to by pptr */
  
+ /* Flags to apply to current level of grourping */
+ 
+ static int addflags;
+ 
+ /**/
+ static Comp
+ compalloc(void)
+ {
+     Comp c = (Comp) alloc(sizeof *c);
+     c->stat |= addflags;
+     return c;
+ }
+ 
+ /**/
+ static int
+ getglobflags()
+ {
+     /* (#X): assumes we are still positioned on the initial '(' */
+     pptr++;
+     while (*++pptr && *pptr != Outpar) {
+ 	switch (*pptr) {
+ 	case 'l':
+ 	    /* Lowercase in pattern matches lower or upper in target */
+ 	    addflags |= C_LCMATCHUC;
+ 	    break;
+ 
+ 	case 'i':
+ 	    /* Fully case insensitive */
+ 	    addflags |= C_IGNCASE;
+ 	    break;
+ 
+ 	case 'I':
+ 	    /* Restore case sensitivity */
+ 	    addflags &= ~(C_LCMATCHUC|C_IGNCASE);
+ 	    break;
+ 
+ 	default:
+ 	    return 1;
+ 	}
+     }
+     if (*pptr != Outpar)
+ 	return 1;
+     pptr++;
+     return 0;
+ }
+ 
  /* enum used with ksh-like patterns, @(...) etc. */
  
  enum { KF_NONE, KF_AT, KF_QUEST, KF_STAR, KF_PLUS, KF_NOT };
***************
*** 447,453 ****
  parsecomp(int gflag)
  {
      int kshfunc;
!     Comp c = (Comp) alloc(sizeof *c), c1, c2;
      char *cstr, *ls = NULL;
  
      /* In case of alternatives, code coming up is stored in tail. */
--- 504,510 ----
  parsecomp(int gflag)
  {
      int kshfunc;
!     Comp c = compalloc(), c1, c2;
      char *cstr, *ls = NULL;
  
      /* In case of alternatives, code coming up is stored in tail. */
***************
*** 468,477 ****
  	    c->str = dupstrpfx(cstr, pptr - cstr);
  	    pptr++;
  
! 	    c1 = (Comp) alloc(sizeof *c1);
  	    c1->stat |= C_STAR;
  
! 	    c2 = (Comp) alloc(sizeof *c2);
  	    if (!(c2->exclude = parsecomp(gflag)))
  		return NULL;
  	    if (!*pptr || *pptr == '/')
--- 525,534 ----
  	    c->str = dupstrpfx(cstr, pptr - cstr);
  	    pptr++;
  
! 	    c1 = compalloc();
  	    c1->stat |= C_STAR;
  
! 	    c2 = compalloc();
  	    if (!(c2->exclude = parsecomp(gflag)))
  		return NULL;
  	    if (!*pptr || *pptr == '/')
***************
*** 513,518 ****
--- 570,608 ----
  		pptr++;
  	}
  
+ 	if (*pptr == Inpar && pptr[1] == Pound) {
+ 	    /* Found some globbing flags */
+ 	    char *eptr = pptr;
+ 	    if (kshfunc != KF_NONE)
+ 		eptr--;
+ 	    if (getglobflags())
+ 		return NULL;
+ 	    if (eptr == cstr) {
+ 		/* if no string yet, carry on and get one. */
+ 		c->stat |= addflags;
+ 		cstr = pptr;
+ 		continue;
+ 	    }
+ 	    c->str = dupstrpfx(cstr, eptr - cstr);
+ 	    /*
+ 	     * The next bit simply handles the case where . or ..
+ 	     * is followed by a set of flags, but we need to force
+ 	     * them to be handled as a string.  Hardly worth it.
+ 	     */
+ 	    if (!*pptr || (!mode && *pptr == '/') || *pptr == Bar ||
+ 		(isset(EXTENDEDGLOB) && *pptr == Tilde &&
+ 		 pptr[1] && pptr[1] != Outpar && pptr[1] != Bar) ||
+ 		*pptr == Outpar) {
+ 		if (*pptr == '/' || !*pptr ||
+ 		    (isset(EXTENDEDGLOB) && *pptr == Tilde &&
+ 		     (gflag & GF_TOPLEV)))
+ 		    c->stat |= C_LAST;
+ 		return c;
+ 	    }
+ 	    if (!(c->next = parsecomp(gflag)))
+ 		return NULL;
+ 	    return c;
+ 	}
  	if (*pptr == Inpar) {
  	    /* Found a group (...) */
  	    char *startp = pptr, *endp;
***************
*** 552,567 ****
  	    pptr = startp;
  	    c->str = dupstrpfx(cstr, (pptr - cstr) - (kshfunc != KF_NONE));
  	    pptr++;
! 	    c2 = (Comp) alloc(sizeof *c);
  	    c->next = c2;
  	    c2->next = (dpnd || kshfunc == KF_NOT) ?
! 		c1 : (Comp) alloc(sizeof *c);
  	    if (!(c2->left = parsecompsw(0)))
  		return NULL;
  	    if (kshfunc == KF_NOT) {
  		/* we'd actually rather it didn't match.  Instead, match *
  		 * a star and put the parsed pattern into exclude.       */
! 		Comp c3 = (Comp) alloc(sizeof *c3);
  		c3->stat |= C_STAR;
  
  		c2->exclude = c2->left;
--- 642,657 ----
  	    pptr = startp;
  	    c->str = dupstrpfx(cstr, (pptr - cstr) - (kshfunc != KF_NONE));
  	    pptr++;
! 	    c2 = compalloc();
  	    c->next = c2;
  	    c2->next = (dpnd || kshfunc == KF_NOT) ?
! 		c1 : compalloc();
  	    if (!(c2->left = parsecompsw(0)))
  		return NULL;
  	    if (kshfunc == KF_NOT) {
  		/* we'd actually rather it didn't match.  Instead, match *
  		 * a star and put the parsed pattern into exclude.       */
! 		Comp c3 = compalloc();
  		c3->stat |= C_STAR;
  
  		c2->exclude = c2->left;
***************
*** 584,590 ****
  	     */
  	    c->str = dupstrpfx(cstr, pptr - cstr);
  	    pptr++;
! 	    c1 = (Comp) alloc(sizeof *c1);
  	    c1->stat |= C_STAR;
  	    if (!(c2 = parsecomp(gflag)))
  		return NULL;
--- 674,680 ----
  	     */
  	    c->str = dupstrpfx(cstr, pptr - cstr);
  	    pptr++;
! 	    c1 = compalloc();
  	    c1->stat |= C_STAR;
  	    if (!(c2 = parsecomp(gflag)))
  		return NULL;
***************
*** 596,608 ****
  	    /* repeat whatever we've just had (ls) zero or more times */
  	    if (!ls)
  		return NULL;
! 	    c2 = (Comp) alloc(sizeof *c);
  	    c2->str = dupstrpfx(ls, pptr - ls);
  	    pptr++;
  	    if (*pptr == Pound) {
  		/* need one or more matches: cheat by copying previous char */
  		pptr++;
! 		c->next = c1 = (Comp) alloc(sizeof *c);
  		c1->str = c2->str;
  	    } else
  		c1 = c;
--- 686,698 ----
  	    /* repeat whatever we've just had (ls) zero or more times */
  	    if (!ls)
  		return NULL;
! 	    c2 = compalloc();
  	    c2->str = dupstrpfx(ls, pptr - ls);
  	    pptr++;
  	    if (*pptr == Pound) {
  		/* need one or more matches: cheat by copying previous char */
  		pptr++;
! 		c->next = c1 = compalloc();
  		c1->str = c2->str;
  	    } else
  		c1 = c;
***************
*** 669,674 ****
--- 759,765 ----
  parsecompsw(int gflag)
  {
      Comp c1, c2, c3, excl = NULL, stail = tail;
+     int oaddflags = addflags;
      char *sptr;
  
      /*
***************
*** 709,715 ****
      tail = stail;
      if (*pptr == Bar || excl) {
  	/* found an alternative or something to exclude */
! 	c2 = (Comp) alloc(sizeof *c2);
  	if (*pptr == Bar) {
  	    /* get the next alternative after the | */
  	    pptr++;
--- 800,806 ----
      tail = stail;
      if (*pptr == Bar || excl) {
  	/* found an alternative or something to exclude */
! 	c2 = compalloc();
  	if (*pptr == Bar) {
  	    /* get the next alternative after the | */
  	    pptr++;
***************
*** 728,735 ****
  	    c2->next = stail;
  	if (gflag & GF_PATHADD)
  	    c2->stat |= C_PATHADD;
! 	return c2;
      }
      return c1;
  }
  
--- 819,828 ----
  	    c2->next = stail;
  	if (gflag & GF_PATHADD)
  	    c2->stat |= C_PATHADD;
! 	c1 = c2;
      }
+     if (!(gflag & GF_TOPLEV))
+ 	addflags = oaddflags;
      return c1;
  }
  
***************
*** 758,764 ****
  	    errflag = 1;
  	    return NULL;
  	}
! 	p1->comp = (Comp) alloc(sizeof *p1->comp);
  	p1->comp->stat |= C_LAST;	/* end of path component  */
  	p1->comp->str = dupstring("*");
  	*p1->comp->str = Star;		/* match anything...      */
--- 851,857 ----
  	    errflag = 1;
  	    return NULL;
  	}
! 	p1->comp = compalloc();
  	p1->comp->stat |= C_LAST;	/* end of path component  */
  	p1->comp->str = dupstring("*");
  	*p1->comp->str = Star;		/* match anything...      */
***************
*** 814,821 ****
--- 907,934 ----
  parsepat(char *str)
  {
      mode = 0;			/* path components present */
+     addflags = 0;
      pptr = str;
      tail = NULL;
+     /*
+      * Check for initial globbing flags, so that they don't form
+      * a bogus path component.
+      */
+     if (*pptr == Inpar && pptr[1] == Pound && isset(EXTENDEDGLOB) &&
+ 	getglobflags())
+ 	return NULL;
+ 
+     /* Now there is no (#X) in front, we can check the path. */
+     if (!pathbuf)
+ 	pathbuf = zalloc(pathbufsz = PATH_MAX);
+     DPUTS(pathbufcwd, "BUG: glob changed directory");
+     if (*pptr == '/') {		/* pattern has absolute path */
+ 	pptr++;
+ 	pathbuf[0] = '/';
+ 	pathbuf[pathpos = 1] = '\0';
+     } else			/* pattern is relative to pwd */
+ 	pathbuf[pathpos = 0] = '\0';
+ 
      return parsecomplist();
  }
  
***************
*** 897,903 ****
  	    if (*s == Bar || *s == Outpar ||
  		(isset(EXTENDEDGLOB) && *s == Tilde))
  		break;
! 	if (*s == Inpar) {
  	    /* Real qualifiers found. */
  	    int sense = 0;	/* bit 0 for match (0)/don't match (1)   */
  				/* bit 1 for follow links (2), don't (0) */
--- 1010,1016 ----
  	    if (*s == Bar || *s == Outpar ||
  		(isset(EXTENDEDGLOB) && *s == Tilde))
  		break;
! 	if (*s == Inpar && (!isset(EXTENDEDGLOB) || s[1] != Pound)) {
  	    /* Real qualifiers found. */
  	    int sense = 0;	/* bit 0 for match (0)/don't match (1)   */
  				/* bit 1 for follow links (2), don't (0) */
***************
*** 1234,1248 ****
  	    }
  	}
      }
-     if (!pathbuf)
- 	pathbuf = zalloc(pathbufsz = PATH_MAX);
-     DPUTS(pathbufcwd, "BUG: glob changed directory");
-     if (*str == '/') {		/* pattern has absolute path */
- 	str++;
- 	pathbuf[0] = '/';
- 	pathbuf[pathpos = 1] = '\0';
-     } else			/* pattern is relative to pwd */
- 	pathbuf[pathpos = 0] = '\0';
      q = parsepat(str);
      if (!q || errflag) {	/* if parsing failed */
  	if (unset(BADPATTERN)) {
--- 1347,1352 ----
***************
*** 1267,1273 ****
      /* Deal with failures to match depending on options */
      if (matchct)
  	badcshglob |= 2;	/* at least one cmd. line expansion O.K. */
!     else if (!gf_nullglob)
  	if (isset(CSHNULLGLOB)) {
  	    badcshglob |= 1;	/* at least one cmd. line expansion failed */
  	} else if (isset(NOMATCH)) {
--- 1371,1377 ----
      /* Deal with failures to match depending on options */
      if (matchct)
  	badcshglob |= 2;	/* at least one cmd. line expansion O.K. */
!     else if (!gf_nullglob) {
  	if (isset(CSHNULLGLOB)) {
  	    badcshglob |= 1;	/* at least one cmd. line expansion failed */
  	} else if (isset(NOMATCH)) {
***************
*** 1279,1284 ****
--- 1383,1389 ----
  	    untokenize(*matchptr++ = dupstring(ostr));
  	    matchct = 1;
  	}
+     }
      /* Sort arguments in to lexical (and possibly numeric) order. *
       * This is reversed to facilitate insertion into the list.    */
      qsort((void *) & matchbuf[0], matchct, sizeof(char *),
***************
*** 1373,1383 ****
  		    *str++ = '{', *str = '}';
  		else
  		    bc++;
! 	    } else if (*str == Outbrace)
  		if (!bc)
  		    *str = '}';
  		else if (!--bc)
  		    return 1;
  	return 0;
      }
      /* Otherwise we need to look for... */
--- 1478,1489 ----
  		    *str++ = '{', *str = '}';
  		else
  		    bc++;
! 	    } else if (*str == Outbrace) {
  		if (!bc)
  		    *str = '}';
  		else if (!--bc)
  		    return 1;
+ 	    }
  	return 0;
      }
      /* Otherwise we need to look for... */
***************
*** 1554,1564 ****
  	else if (*str2 == Outbrace) {
  	    if (--bc == 0)
  		break;
! 	} else if (bc == 1)
  	    if (*str2 == Comma)
  		++comma;	/* we have {foo,bar} */
  	    else if (*str2 == '.' && str2[1] == '.')
  		dotdot++;	/* we have {num1..num2} */
      DPUTS(bc, "BUG: unmatched brace in xpandbraces()");
      if (!comma && dotdot) {
  	/* Expand range like 0..10 numerically: comma or recursive
--- 1660,1671 ----
  	else if (*str2 == Outbrace) {
  	    if (--bc == 0)
  		break;
! 	} else if (bc == 1) {
  	    if (*str2 == Comma)
  		++comma;	/* we have {foo,bar} */
  	    else if (*str2 == '.' && str2[1] == '.')
  		dotdot++;	/* we have {num1..num2} */
+ 	}
      DPUTS(bc, "BUG: unmatched brace in xpandbraces()");
      if (!comma && dotdot) {
  	/* Expand range like 0..10 numerically: comma or recursive
***************
*** 2489,2495 ****
  	    }
  	    continue;
  	}
! 	if (*pptr == *pat) {
  	    /* just plain old characters */
  	    pptr++;
  	    pat++;
--- 2596,2605 ----
  	    }
  	    continue;
  	}
! 	if (*pptr == *pat ||
! 	    (((c->stat & C_IGNCASE) ? (tulower(*pat) == tulower(*pptr)) :
! 	      (c->stat & C_LCMATCHUC) ?
! 	      (islower(*pat) && tuupper(*pat) == *pptr) : 0))) {
  	    /* just plain old characters */
  	    pptr++;
  	    pat++;
***************
*** 2508,2513 ****
--- 2618,2624 ----
  {
      remnulargs(str);
      mode = 1;			/* no path components */
+     addflags = 0;
      pptr = str;
      tail = NULL;
      return parsecompsw(GF_TOPLEV);

-- 
Peter Stephenson <pws@xxxxxxxxxxxxxxxxx>       Tel: +39 050 844536
WWW:  http://www.ifh.de/~pws/
Dipartimento di Fisica, Via Buonarotti 2, 56100 Pisa, Italy



Messages sorted by: Reverse Date, Date, Thread, Author