Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

PATCH: zsh-3.1.2-zefram4 negated patterns



Here's my effort at getting all the globbing tests to work (plus a
couple of others which were giving problems).  As usual, it would
probably be nicer to start from scratch, but without that I don't see
any much simpler way than the following, which treats all exclusions
by backtracking: that's because a pattern like (*~foo) or (^foo) in a
pattern might originally match `foo', then have to backtrack to match
just the `fo'; in general both the thing before the ~ and the
expression in which this subexpression is embedded can be arbitrarily
complicated and backtracking is the only certain way of getting it
right.

This means that even simple command-line things `ls ^*.o' now do this,
so they are a bit slower.  If this is noticeable, I could probably
optimise ^'s somewhat when they are not inside a closure.  In the
meantime, I have instead optimised non-final *'s, which explains
references to C_STAR and STARP in the code.  Instead of laboriously
calling doesmatch() to get each new character, it now simply builds a
list for backtracking in one go.  This isn't a fundamental part of the
new code, but it's not very big either.  I'd be happy to provide a
patch without that in.

*** Misc/globtests.excl	Tue Apr  7 01:02:16 1998
--- Misc/globtests	Thu Apr 23 13:58:59 1998
***************
*** 91,95 ****
--- 91,99 ----
  f zoot          (^z*|*x)
  t foox          (^z*|*x)
  t zoox          (^z*|*x)
+ t foo           (^foo)#
+ f foob          (^foo)b*
+ t foobb         (^foo)b*
+ f zsh           ^z*
  EOT
  print "$failed tests failed."
*** Misc/globtests.ksh.excl	Tue Apr  7 01:02:15 1998
--- Misc/globtests.ksh	Thu Apr 23 13:35:16 1998
***************
*** 84,88 ****
--- 84,91 ----
  f zoot          @(!(z*)|*x)
  t foox          @(!(z*)|*x)
  t zoox          @(!(z*)|*x)
+ t foo           *(!(foo))
+ f foob          !(foo)b*
+ t foobb         !(foo)b*
  EOT
  print "$failed tests failed."
*** Src/glob.c.excl	Wed Apr  8 22:46:52 1998
--- Src/glob.c	Fri Apr 24 14:15:41 1998
***************
*** 108,122 ****
  #define C_ONEHASH	1
  #define C_TWOHASH	2
  #define C_OPTIONAL	4
! #define C_CLOSURE	(C_ONEHASH|C_TWOHASH|C_OPTIONAL)
! #define C_LAST		8
! #define C_PATHADD	16
  
  /* Test macros for the above */
  #define CLOSUREP(c)	(c->stat & C_CLOSURE)
! #define ONEHASHP(c)	(c->stat & C_ONEHASH)
  #define TWOHASHP(c)	(c->stat & C_TWOHASH)
  #define OPTIONALP(c)	(c->stat & C_OPTIONAL)
  #define LASTP(c)	(c->stat & C_LAST)
  #define PATHADDP(c)	(c->stat & C_PATHADD)
  
--- 108,124 ----
  #define C_ONEHASH	1
  #define C_TWOHASH	2
  #define C_OPTIONAL	4
! #define C_STAR		8    
! #define C_CLOSURE	(C_ONEHASH|C_TWOHASH|C_OPTIONAL|C_STAR)
! #define C_LAST		16
! #define C_PATHADD	32
  
  /* Test macros for the above */
  #define CLOSUREP(c)	(c->stat & C_CLOSURE)
! #define ONEHASHP(c)	(c->stat & (C_ONEHASH|C_STAR))
  #define TWOHASHP(c)	(c->stat & C_TWOHASH)
  #define OPTIONALP(c)	(c->stat & C_OPTIONAL)
+ #define STARP(c)	(c->stat & C_STAR)
  #define LASTP(c)	(c->stat & C_LAST)
  #define PATHADDP(c)	(c->stat & C_PATHADD)
  
***************
*** 465,474 ****
  	 */
  	if (*pptr == Hat && isset(EXTENDEDGLOB)) {
  	    /* negate remaining pattern */
! 	    pptr++;
  	    c->str = dupstrpfx(cstr, pptr - cstr);
! 	    if (!(c->next = parsecomp(gflag)))
  		return NULL;
  	    return c;
  	}
  
--- 467,489 ----
  	 */
  	if (*pptr == Hat && isset(EXTENDEDGLOB)) {
  	    /* negate remaining pattern */
! 	    Comp stail = tail;
! 	    tail = NULL;
  	    c->str = dupstrpfx(cstr, pptr - cstr);
! 	    pptr++;
! 
! 	    c1 = (Comp) alloc(sizeof *c1);
! 	    c1->stat |= C_STAR;
! 
! 	    c2 = (Comp) alloc(sizeof *c2);
! 	    if (!(c2->exclude = parsecomp(gflag)))
  		return NULL;
+ 	    if (!*pptr || *pptr == '/')
+ 		c2->stat |= C_LAST;
+ 	    c2->left = c1;
+ 	    c2->next = stail;
+ 	    c->next = c2;
+ 	    tail = stail;
  	    return c;
  	}
  
***************
*** 535,541 ****
  	    if (!(c1 = parsecomp(gflag)))
  		return NULL;
  	    /* ...remembering what comes after it... */
! 	    tail = dpnd ? NULL : c1;
  	    /* ...before going back and parsing inside the group. */
  	    endp = pptr;
  	    pptr = startp;
--- 550,556 ----
  	    if (!(c1 = parsecomp(gflag)))
  		return NULL;
  	    /* ...remembering what comes after it... */
! 	    tail = (dpnd || kshfunc == KF_NOT) ? NULL : c1;
  	    /* ...before going back and parsing inside the group. */
  	    endp = pptr;
  	    pptr = startp;
***************
*** 543,558 ****
  	    pptr++;
  	    c2 = (Comp) alloc(sizeof *c);
  	    c->next = c2;
! 	    c2->next = dpnd ? c1 : (Comp) alloc(sizeof *c);
  	    if (!(c2->left = parsecompsw(0)))
  		return NULL;
  	    if (kshfunc == KF_NOT) {
  		/* we'd actually rather it didn't match.  Instead, match *
  		 * a star and put the parsed pattern into exclude.       */
  		Comp c3 = (Comp) alloc(sizeof *c3);
! 		*(c3->str = dupstring("?")) = Quest;
! 		c3->stat |= C_ONEHASH;
! 		c3->next = tail;
  
  		c2->exclude = c2->left;
  		c2->left = c3;
--- 558,572 ----
  	    pptr++;
  	    c2 = (Comp) alloc(sizeof *c);
  	    c->next = c2;
! 	    c2->next = (dpnd || kshfunc == KF_NOT) ?
! 		c1 : (Comp) alloc(sizeof *c);
  	    if (!(c2->left = parsecompsw(0)))
  		return NULL;
  	    if (kshfunc == KF_NOT) {
  		/* we'd actually rather it didn't match.  Instead, match *
  		 * a star and put the parsed pattern into exclude.       */
  		Comp c3 = (Comp) alloc(sizeof *c3);
! 		c3->stat |= C_STAR;
  
  		c2->exclude = c2->left;
  		c2->left = c3;
***************
*** 569,583 ****
  	    (unset(EXTENDEDGLOB) || !(gflag & GF_TOPLEV) ||
  	     pptr[1] != Tilde || !pptr[2] || pptr[2] == Bar ||
  	     pptr[2] == Outpar) && (mode || pptr[1] != '/')) {
! 	    /* Star followed by other patterns is treated like a closure
! 	     * (zero or more repetitions) of the single character pattern
! 	     * operator `?'.
  	     */
  	    c->str = dupstrpfx(cstr, pptr - cstr);
  	    pptr++;
  	    c1 = (Comp) alloc(sizeof *c1);
! 	    *(c1->str = dupstring("?")) = Quest;
! 	    c1->stat |= C_ONEHASH;
  	    if (!(c2 = parsecomp(gflag)))
  		return NULL;
  	    c1->next = c2;
--- 583,595 ----
  	    (unset(EXTENDEDGLOB) || !(gflag & GF_TOPLEV) ||
  	     pptr[1] != Tilde || !pptr[2] || pptr[2] == Bar ||
  	     pptr[2] == Outpar) && (mode || pptr[1] != '/')) {
! 	    /* Star followed by other patterns is now treated as a special
! 	     * type of closure in doesmatch().
  	     */
  	    c->str = dupstrpfx(cstr, pptr - cstr);
  	    pptr++;
  	    c1 = (Comp) alloc(sizeof *c1);
! 	    c1->stat |= C_STAR;
  	    if (!(c2 = parsecomp(gflag)))
  		return NULL;
  	    c1->next = c2;
***************
*** 646,652 ****
  static Comp
  parsecompsw(int gflag)
  {
!     Comp c1, c2, c3, excl = NULL;
  
      c1 = parsecomp(gflag);
      if (!c1)
--- 658,686 ----
  static Comp
  parsecompsw(int gflag)
  {
!     Comp c1, c2, c3, excl = NULL, stail = tail;
!     char *sptr;
! 
!     /*
!      * Check for a tilde in the expression.  We need to know this in 
!      * advance so as to be able to treat the whole a~b expression by
!      * backtracking:  see exclusion code in doesmatch().
!      */
!     if (isset(EXTENDEDGLOB)) {
! 	int pct = 0;
! 	for (sptr = pptr; *sptr; sptr++) {
! 	    if (*sptr == Inpar)
! 		pct++;
! 	    else if (*sptr == Outpar && --pct < 0)
! 		break;
! 	    else if (*sptr == Bar && !pct)
! 		break;
! 	    else if (*sptr == Tilde && !pct) {
! 		tail = NULL;
! 		break;
! 	    }
! 	}
!     }
  
      c1 = parsecomp(gflag);
      if (!c1)
***************
*** 662,667 ****
--- 696,702 ----
  	if (!excl)
  	    return NULL;
      }
+     tail = stail;
      if (*pptr == Bar || excl) {
  	/* found an alternative or something to exclude */
  	c2 = (Comp) alloc(sizeof *c2);
***************
*** 680,686 ****
  	c2->str = dupstring("");
  	c2->left = c1;
  	c2->right = c3;
! 	c2->exclude = excl;
  	if (gflag & GF_PATHADD)
  	    c2->stat |= C_PATHADD;
  	return c2;
--- 715,722 ----
  	c2->str = dupstring("");
  	c2->left = c1;
  	c2->right = c3;
! 	if ((c2->exclude = excl))
! 	    c2->next = stail;
  	if (gflag & GF_PATHADD)
  	    c2->stat |= C_PATHADD;
  	return c2;
***************
*** 1983,1988 ****
--- 2019,2026 ----
  	pptr = eptr;
  	ret = doesmatch(c->exclude);
      }
+     if (*pptr)
+ 	ret = 0;
  
      pptr = saves;
      first = savei;
***************
*** 2016,2032 ****
      char *opptr = pptr;
  
      while (*pptr) {
! 	char *saves = pptr;
! 	if (OPTIONALP(c) && *pdone >= 1)
! 	    return;
! 	if (!matchonce(c) || saves == pptr ||
! 	    trystring[pptr-opptr]) {
! 	    pptr = saves;
! 	    break;
  	}
- 	gcnode = (Gclose)zalloc(sizeof(struct gclose));
- 	gcnode->start = saves;
- 	gcnode->end = pptr;
  	pushnode(closlist, gcnode);
  	(*pdone)++;
      }
--- 2054,2078 ----
      char *opptr = pptr;
  
      while (*pptr) {
! 	if (STARP(c)) {
! 	    if (trystring[(pptr+1)-opptr])
! 		break;
! 	    gcnode = (Gclose)zalloc(sizeof(struct gclose));
! 	    gcnode->start = pptr;
! 	    gcnode->end = ++pptr;
! 	} else {
! 	    char *saves = pptr;
! 	    if (OPTIONALP(c) && *pdone >= 1)
! 		return;
! 	    if (!matchonce(c) || saves == pptr ||
! 		trystring[pptr-opptr]) {
! 		pptr = saves;
! 		break;
! 	    }
! 	    gcnode = (Gclose)zalloc(sizeof(struct gclose));
! 	    gcnode->start = saves;
! 	    gcnode->end = pptr;
  	}
  	pushnode(closlist, gcnode);
  	(*pdone)++;
      }
***************
*** 2057,2067 ****
  	     */
  	    char looka;
  
! 	    if (*c->str == Quest && !c->str[1] && c->next &&
  		!c->next->left && (looka = *c->next->str) &&
  		!itok(looka)) {
  		/* Another simple optimisation for a very common case:
! 		 * we are processing a * (i.e. ?#) and there is
  		 * an ordinary character match next.  We look ahead for
  		 * that character, taking care of Meta bytes.
  		 */
--- 2103,2113 ----
  	     */
  	    char looka;
  
! 	    if (STARP(c) && c->next &&
  		!c->next->left && (looka = *c->next->str) &&
  		!itok(looka)) {
  		/* Another simple optimisation for a very common case:
! 		 * we are processing a * and there is
  		 * an ordinary character match next.  We look ahead for
  		 * that character, taking care of Meta bytes.
  		 */
***************
*** 2090,2096 ****
  			return 0;
  		    pptr = saves;
  		    first = 0;
! 		    if (!matchonce(c) || pptr == saves)
  			return 0;
  		}
  	    }
--- 2136,2146 ----
  			return 0;
  		    pptr = saves;
  		    first = 0;
! 		    if (STARP(c)) {
! 			if (!*pptr)
! 			    return 0;
! 			pptr++;
! 		    } else if (!matchonce(c) || pptr == saves)
  			return 0;
  		}
  	    }
***************
*** 2182,2190 ****
  	    /* Loop over alternatives with exclusions: (foo~bar|...). *
  	     * Exclusions apply to the pattern in c->left.            */
  	    if (c->left || c->right) {
! 		int ret, ret2;
! 		ret = doesmatch(c->left) &&
! 		    !(c->exclude && excluded(c, saves));
  		ret2 = 0;
  		if (c->right && (!ret || inclosure)) {
  		    /* If in a closure, we always want the longest match. */
--- 2232,2286 ----
  	    /* Loop over alternatives with exclusions: (foo~bar|...). *
  	     * Exclusions apply to the pattern in c->left.            */
  	    if (c->left || c->right) {
! 		int ret = 0, ret2 = 0;
! 		if (c->exclude) {
! 		    char *exclend = 0;
! 
! 		    /* We may need to back up on things like `(*~foo)'
! 		     * if the `*' matched `foo' but needs to match `fo'.
! 		     * exclend marks the end of the shortened text.  We
! 		     * need to restore it to match the tail.
! 		     * We set `inclosure' because we need the more
! 		     * sophisticated code in doesmatch() for any nested
! 		     * closures.
! 		     */
! 		    inclosure++;
! 
! 		    while (!exclend || exclend >= pptr) {
! 			char exclsav = 0;
! 			if (exclend) {
! 			     exclsav = *exclend;
! 			    *exclend = '\0';
! 			 }
! 			if ((ret = doesmatch(c->left))) {
! 			    if (exclend)
! 				*exclend = exclsav;
! 			    exclsav = *(exclend = pptr);
! 			    *exclend = '\0';
! 			    ret2 = !excluded(c, saves);
! 			}
! 			if (exclend)
! 			    *exclend = exclsav;
! 
! 			if (!ret)
! 			    break;
! 			if ((ret = ret2 &&
! 			     ((!c->next && (!LASTP(c) || !*pptr))
! 			      || (c->next && doesmatch(c->next)))) ||
! 			    (!c->next && LASTP(c)))
! 			    break;
! 			/* Back up if necessary: exclend gives the position
! 			 * of the end of the match we are excluding,
! 			 * so only try to match to there.
! 			 */
! 			exclend--;
! 			pptr = saves;
! 		    }
! 		    inclosure--;
! 		    if (ret)
! 			return 1;
! 		} else
! 		    ret = doesmatch(c->left);
  		ret2 = 0;
  		if (c->right && (!ret || inclosure)) {
  		    /* If in a closure, we always want the longest match. */
***************
*** 2227,2234 ****
  	    pat++;
  	    continue;
  	}
- 	if (*pat == Hat)	/* following pattern is negated */
- 	    return 1 - doesmatch(c->next);
  	if (*pat == Inbrack) {
  	    /* Match groups of characters */
  #define PAT(X) (pat[X] == Meta ? pat[(X)+1] ^ 32 : untok(pat[X]))
--- 2323,2328 ----

-- 
Peter Stephenson <pws@xxxxxx>       Tel: +39 50 844536
WWW:  http://www.ifh.de/~pws/
Gruppo Teorico, Dipartimento di Fisica
Piazza Torricelli 2, 56100 Pisa, Italy



Messages sorted by: Reverse Date, Date, Thread, Author