Zsh Mailing List Archive
Messages sorted by:
Reverse Date,
Date,
Thread,
Author
Re: Substitution ${...///} slows down when certain UTF character occurs
This uses the new interface. I haven't done any testing apart from the
normal test suite.
There's one change I still want to make, which is to put the remaining
allocatiion onto the heap --- it should be used infrequently enough to
make this feasible. However, it's probably seeing what the effects are
before that.
There's a reasonable chance there's still some glitch with metafied
characters. We could probably do with some more parameter substitution
tests involving Mikael's interesting characters, possibly more failure
cases, too.
pws
diff --git a/Src/glob.c b/Src/glob.c
index 8bf7352..0594f0a 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -2450,29 +2450,46 @@ matchpat(char *a, char *b)
/* please do not laugh at this code. */
/* Having found a match in getmatch, decide what part of string
- * to return. The matched part starts b characters into string s
- * and finishes e characters in: 0 <= b <= e <= strlen(s)
+ * to return. The matched part starts b characters into string imd->ustr
+ * and finishes e characters in: 0 <= b <= e <= imd->ulen on input
* (yes, empty matches should work).
- * fl is a set of the SUB_* matches defined in zsh.h from SUB_MATCH onwards;
- * the lower parts are ignored.
- * replstr is the replacement string for a substitution
+ *
+ * imd->flags is a set of the SUB_* matches defined in zsh.h from
+ * SUB_MATCH onwards; the lower parts are ignored.
+ *
+ * imd->replstr is the replacement string for a substitution
+ *
+ * imd->replstr is metafied and the values put in imd->repllist are metafied.
*/
/**/
static char *
-get_match_ret(char *s, int b, int e, int fl, char *replstr,
- LinkList repllist)
+get_match_ret(Imatchdata imd, int b, int e)
{
- char buf[80], *r, *p, *rr;
- int ll = 0, l = strlen(s), bl = 0, t = 0, i;
-
+ char buf[80], *r, *p, *rr, *replstr = imd->replstr;
+ int ll = 0, bl = 0, t = 0, add = 0, fl = imd->flags, i;
+
+ /* Account for b and e referring to unmetafied string */
+ for (p = imd->ustr; p < imd->ustr + b; p++)
+ if (imeta(*p))
+ add++;
+ b += add;
+ for (; p < imd->ustr + e; p++)
+ if (imeta(*p))
+ add++;
+ e += add;
+ for (; p < imd->ustr + imd->ulen; p++)
+ if (imeta(*p))
+ add++;
+
+ /* Everything now refers to meatfied lengths. */
if (replstr || (fl & SUB_LIST)) {
if (fl & SUB_DOSUBST) {
replstr = dupstring(replstr);
singsub(&replstr);
untokenize(replstr);
}
- if ((fl & (SUB_GLOBAL|SUB_LIST)) && repllist) {
+ if ((fl & (SUB_GLOBAL|SUB_LIST)) && imd->repllist) {
/* We are replacing the chunk, just add this to the list */
Repldata rd = (Repldata)
((fl & SUB_LIST) ? zalloc(sizeof(*rd)) : zhalloc(sizeof(*rd)));
@@ -2480,30 +2497,32 @@ get_match_ret(char *s, int b, int e, int fl, char *replstr,
rd->e = e;
rd->replstr = replstr;
if (fl & SUB_LIST)
- zaddlinknode(repllist, rd);
+ zaddlinknode(imd->repllist, rd);
else
- addlinknode(repllist, rd);
- return s;
+ addlinknode(imd->repllist, rd);
+ return imd->mstr;
}
ll += strlen(replstr);
}
if (fl & SUB_MATCH) /* matched portion */
ll += 1 + (e - b);
if (fl & SUB_REST) /* unmatched portion */
- ll += 1 + (l - (e - b));
+ ll += 1 + (imd->mlen - (e - b));
if (fl & SUB_BIND) {
/* position of start of matched portion */
- sprintf(buf, "%d ", MB_METASTRLEN2END(s, 0, s+b) + 1);
+ sprintf(buf, "%d ", MB_METASTRLEN2END(imd->mstr, 0, imd->mstr+b) + 1);
ll += (bl = strlen(buf));
}
if (fl & SUB_EIND) {
/* position of end of matched portion */
- sprintf(buf + bl, "%d ", MB_METASTRLEN2END(s, 0, s+e) + 1);
+ sprintf(buf + bl, "%d ",
+ MB_METASTRLEN2END(imd->mstr, 0, imd->mstr+e) + 1);
ll += (bl = strlen(buf));
}
if (fl & SUB_LEN) {
/* length of matched portion */
- sprintf(buf + bl, "%d ", MB_METASTRLEN2END(s+b, 0, s+e));
+ sprintf(buf + bl, "%d ", MB_METASTRLEN2END(imd->mstr+b, 0,
+ imd->mstr+e));
ll += (bl = strlen(buf));
}
if (bl)
@@ -2513,7 +2532,7 @@ get_match_ret(char *s, int b, int e, int fl, char *replstr,
if (fl & SUB_MATCH) {
/* copy matched portion to new buffer */
- for (i = b, p = s + b; i < e; i++)
+ for (i = b, p = imd->mstr + b; i < e; i++)
*rr++ = *p++;
t = 1;
}
@@ -2523,12 +2542,12 @@ get_match_ret(char *s, int b, int e, int fl, char *replstr,
if (t)
*rr++ = ' ';
/* there may be unmatched bits at both beginning and end of string */
- for (i = 0, p = s; i < b; i++)
+ for (i = 0, p = imd->mstr; i < b; i++)
*rr++ = *p++;
if (replstr)
for (p = replstr; *p; )
*rr++ = *p++;
- for (i = e, p = s + e; i < l; i++)
+ for (i = e, p = imd->mstr + e; i < imd->mlen; i++)
*rr++ = *p++;
t = 1;
}
@@ -2710,26 +2729,18 @@ set_pat_end(Patprog p, char null_me)
/*
* Increment *tp over character which may be multibyte.
- * Return number of bytes that remain in the character after unmetafication.
+ * Return number of bytes.
+ * All unmetafied here.
*/
/**/
-static int iincchar(char **tp)
+static int iincchar(char **tp, int left)
{
char *t = *tp;
- int mbclen = mb_metacharlenconv(t, NULL);
- int umlen = 0;
-
- while (mbclen--) {
- umlen++;
- if (*t++ == Meta) {
- t++;
- mbclen--;
- }
- }
- *tp = t;
+ int mbclen = mb_charlenconv(t, left, NULL);
+ *tp = t + mbclen;
- return umlen;
+ return mbclen;
}
/**/
@@ -2737,7 +2748,7 @@ static int
igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
LinkList *repllistp)
{
- char *s = *sp, *t, *tmatch;
+ char *s = *sp, *t, *tmatch, *send;
/*
* Note that ioff counts (possibly multibyte) characters in the
* character set (Meta's are not included), while l counts characters in
@@ -2752,36 +2763,52 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
*/
int ioff, l = strlen(*sp), matched = 1, umltot = ztrlen(*sp);
int umlen, nmatches;
- /*
- * List of bits of matches to concatenate with replacement string.
- * The data is a struct repldata. It is not used in cases like
- * ${...//#foo/bar} even though SUB_GLOBAL is set, since the match
- * is anchored. It goes on the heap.
- */
- LinkList repllist = NULL;
+ struct patstralloc patstralloc;
+ struct imatchdata imd;
+
+ (void)patallocstr(p, s, l, umltot, 1, &patstralloc);
+ s = patstralloc.alloced;
+ DPUTS(!s, "forced patallocstr failed");
+ send = s + umltot;
+
+ imd.mstr = *sp;
+ imd.mlen = l;
+ imd.ustr = s;
+ imd.ulen = umltot;
+ imd.flags = fl;
+ imd.replstr = replstr;
+ imd.repllist = NULL;
/* perform must-match test for complex closures */
if (p->mustoff)
{
- /*
- * Yuk. Probably we should rewrite this whole function to
- * use an unmetafied test string.
- *
- * Use META_HEAPDUP because we need a terminating NULL.
- */
- char *muststr = metafy((char *)p + p->mustoff,
- p->patmlen, META_HEAPDUP);
+ char *muststr = (char *)p + p->mustoff;
- if (!strstr(s, muststr))
- matched = 0;
+ matched = 0;
+ if (p->patmlen <= umltot)
+ {
+ for (t = s; t <= send - p->patmlen; t++)
+ {
+ if (!memcmp(muststr, t, p->patmlen)) {
+ matched = 1;
+ break;
+ }
+ }
+ }
}
/* in case we used the prog before... */
p->flags &= ~(PAT_NOTSTART|PAT_NOTEND);
if (fl & SUB_ALL) {
- int i = matched && pattrylen(p, s, -1, -1, NULL, 0);
- *sp = get_match_ret(*sp, 0, i ? l : 0, fl, i ? replstr : 0, NULL);
+ int i = matched && pattrylen(p, s, umltot, 0, &patstralloc, 0);
+ if (!i) {
+ /* Perform under no-match conditions */
+ umltot = 0;
+ imd.replstr = NULL;
+ }
+ *sp = get_match_ret(&imd, 0, umltot);
+ patfreestr(&patstralloc);
if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i)))
return 0;
return 1;
@@ -2809,25 +2836,27 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
* Largest/smallest possible match at head of string.
* First get the longest match...
*/
- if (pattrylen(p, s, -1, -1, NULL, 0)) {
- /* patmatchlen returns metafied length, as we need */
+ if (pattrylen(p, s, umltot, 0, &patstralloc, 0)) {
+ /* patmatchlen returns unmetafied length in this case */
int mlen = patmatchlen();
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
+ send = s + mlen;
/*
* ... now we know whether it's worth looking for the
* shortest, which we do by brute force.
*/
mb_charinit();
- for (t = s, umlen = 0; t < s + mlen; ) {
+ for (t = s, umlen = 0; t < send; ) {
set_pat_end(p, *t);
- if (pattrylen(p, s, t - s, umlen, NULL, 0)) {
+ if (pattrylen(p, s, umlen, 0, &patstralloc, 0)) {
mlen = patmatchlen();
break;
}
- umlen += iincchar(&t);
+ umlen += iincchar(&t, send - t);
}
}
- *sp = get_match_ret(*sp, 0, mlen, fl, replstr, NULL);
+ *sp = get_match_ret(&imd, 0, mlen);
+ patfreestr(&patstralloc);
return 1;
}
break;
@@ -2845,20 +2874,23 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
*/
mb_charinit();
tmatch = NULL;
- for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
+ for (ioff = 0, t = s, umlen = umltot; t < send; ioff++) {
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, NULL, ioff))
+ if (pattrylen(p, t, umlen, 0, &patstralloc, ioff))
tmatch = t;
if (fl & SUB_START)
break;
- umlen -= iincchar(&t);
+ umlen -= iincchar(&t, send - t);
}
if (tmatch) {
- *sp = get_match_ret(*sp, tmatch - s, l, fl, replstr, NULL);
+ *sp = get_match_ret(&imd, tmatch - s, umltot);
+ patfreestr(&patstralloc);
return 1;
}
- if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, NULL, ioff)) {
- *sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
+ if (!(fl & SUB_START) && pattrylen(p, s + umltot, 0, 0,
+ &patstralloc, ioff)) {
+ *sp = get_match_ret(&imd, umltot, umltot);
+ patfreestr(&patstralloc);
return 1;
}
break;
@@ -2868,18 +2900,21 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
* move forward along string until we get a match. *
* Again there's no optimisation. */
mb_charinit();
- for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
+ for (ioff = 0, t = s, umlen = umltot; t < send ; ioff++) {
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
- *sp = get_match_ret(*sp, t-s, l, fl, replstr, NULL);
+ if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) {
+ *sp = get_match_ret(&imd, t-s, umltot);
+ patfreestr(&patstralloc);
return 1;
}
if (fl & SUB_START)
break;
- umlen -= iincchar(&t);
+ umlen -= iincchar(&t, send - t);
}
- if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, NULL, ioff)) {
- *sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
+ if (!(fl & SUB_START) && pattrylen(p, send, 0, 0,
+ &patstralloc, ioff)) {
+ *sp = get_match_ret(&imd, umltot, umltot);
+ patfreestr(&patstralloc);
return 1;
}
break;
@@ -2887,18 +2922,20 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
case SUB_SUBSTR:
/* Smallest at start, but matching substrings. */
set_pat_start(p, l);
- if (!(fl & SUB_GLOBAL) && pattrylen(p, s + l, -1, -1, NULL, 0) &&
+ if (!(fl & SUB_GLOBAL) &&
+ pattrylen(p, send, 0, 0, &patstralloc, 0) &&
!--n) {
- *sp = get_match_ret(*sp, 0, 0, fl, replstr, NULL);
+ *sp = get_match_ret(&imd, 0, 0);
+ patfreestr(&patstralloc);
return 1;
} /* fall through */
case (SUB_SUBSTR|SUB_LONG):
/* longest or smallest at start with substrings */
t = s;
if (fl & SUB_GLOBAL) {
- repllist = (fl & SUB_LIST) ? znewlinklist() : newlinklist();
+ imd.repllist = (fl & SUB_LIST) ? znewlinklist() : newlinklist();
if (repllistp)
- *repllistp = repllist;
+ *repllistp = imd.repllist;
}
ioff = 0; /* offset into string */
umlen = umltot;
@@ -2906,10 +2943,10 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
do {
/* loop over all matches for global substitution */
matched = 0;
- for (; t < s + l; ioff++) {
+ for (; t < send; ioff++) {
/* Find the longest match from this position. */
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
+ if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) {
char *mpos = t + patmatchlen();
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
char *ptr;
@@ -2923,19 +2960,18 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
*/
for (ptr = t, umlen2 = 0; ptr < mpos;) {
set_pat_end(p, *ptr);
- if (pattrylen(p, t, ptr - t, umlen2,
- NULL, ioff)) {
+ if (pattrylen(p, t, umlen2, 0,
+ &patstralloc, ioff)) {
mpos = t + patmatchlen();
break;
}
- umlen2 += iincchar(&ptr);
+ umlen2 += iincchar(&ptr, mpos - ptr);
}
}
if (!--n || (n <= 0 && (fl & SUB_GLOBAL))) {
- *sp = get_match_ret(*sp, t-s, mpos-s, fl,
- replstr, repllist);
+ *sp = get_match_ret(&imd, t-s, mpos-s);
if (mpos == t)
- mpos += mb_metacharlenconv(mpos, NULL);
+ mpos += mb_charlenconv(mpos, send - mpos, NULL);
}
if (!(fl & SUB_GLOBAL)) {
if (n) {
@@ -2945,9 +2981,10 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
* the next character, even if it overlaps
* with what we just found.
*/
- umlen -= iincchar(&t);
+ umlen -= iincchar(&t, send - t);
continue;
} else {
+ patfreestr(&patstralloc);
return 1;
}
}
@@ -2958,11 +2995,11 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
matched = 1;
while (t < mpos) {
ioff++;
- umlen -= iincchar(&t);
+ umlen -= iincchar(&t, send - t);
}
break;
}
- umlen -= iincchar(&t);
+ umlen -= iincchar(&t, send - t);
}
} while (matched);
/*
@@ -2972,8 +3009,9 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
*/
set_pat_start(p, l);
if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG &&
- pattrylen(p, s + l, -1, -1, NULL, 0) && !--n) {
- *sp = get_match_ret(*sp, 0, 0, fl, replstr, repllist);
+ pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) {
+ *sp = get_match_ret(&imd, 0, 0);
+ patfreestr(&patstralloc);
return 1;
}
break;
@@ -2983,8 +3021,10 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
/* Longest/shortest at end, matching substrings. */
if (!(fl & SUB_LONG)) {
set_pat_start(p, l);
- if (pattrylen(p, s + l, 0, 0, NULL, umltot) && !--n) {
- *sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
+ if (pattrylen(p, send, 0, 0, &patstralloc, umltot) &&
+ !--n) {
+ *sp = get_match_ret(&imd, umltot, umltot);
+ patfreestr(&patstralloc);
return 1;
}
}
@@ -3001,13 +3041,13 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
nmatches = 0;
tmatch = NULL;
mb_charinit();
- for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
+ for (ioff = 0, t = s, umlen = umltot; t < send; ioff++) {
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
+ if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) {
nmatches++;
tmatch = t;
}
- umlen -= iincchar(&t);
+ umlen -= iincchar(&t, send - t);
}
if (nmatches) {
char *mpos;
@@ -3017,14 +3057,14 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
*/
n = nmatches - n;
mb_charinit();
- for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
+ for (ioff = 0, t = s, umlen = umltot; t < send; ioff++) {
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, NULL, ioff) &&
+ if (pattrylen(p, t, umlen, 0, &patstralloc, ioff) &&
!n--) {
tmatch = t;
break;
}
- umlen -= iincchar(&t);
+ umlen -= iincchar(&t, send - t);
}
}
mpos = tmatch + patmatchlen();
@@ -3032,29 +3072,31 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
for (t = tmatch, umlen = 0; t < mpos; ) {
set_pat_end(p, *t);
- if (pattrylen(p, tmatch, t - tmatch, umlen,
- NULL, ioff)) {
+ if (pattrylen(p, tmatch, umlen, 0,
+ &patstralloc, ioff)) {
mpos = tmatch + patmatchlen();
break;
}
- umlen += iincchar(&t);
+ umlen += iincchar(&t, mpos - t);
}
}
- *sp = get_match_ret(*sp, tmatch-s, mpos-s, fl,
- replstr, NULL);
+ *sp = get_match_ret(&imd, tmatch-s, mpos-s);
+ patfreestr(&patstralloc);
return 1;
}
set_pat_start(p, l);
- if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, NULL, umltot) &&
+ if ((fl & SUB_LONG) && pattrylen(p, send, 0, 0,
+ &patstralloc, umltot) &&
!--n) {
- *sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
+ *sp = get_match_ret(&imd, umltot, umltot);
+ patfreestr(&patstralloc);
return 1;
}
break;
}
}
- if (repllist && nonempty(repllist)) {
+ if (imd.repllist && nonempty(imd.repllist)) {
/* Put all the bits of a global search and replace together. */
LinkNode nd;
Repldata rd;
@@ -3062,10 +3104,15 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
char *ptr, *start;
int i;
+ /*
+ * Use metafied string again.
+ * Results from get_match_ret in repllist are all metafied.
+ */
+ s = *sp;
if (!(fl & SUB_LIST)) {
lleft = 0; /* size of returned string */
- i = 0; /* start of last chunk we got from *sp */
- for (nd = firstnode(repllist); nd; incnode(nd)) {
+ i = 0; /* start of last chunk we got from *sp */
+ for (nd = firstnode(imd.repllist); nd; incnode(nd)) {
rd = (Repldata) getdata(nd);
lleft += rd->b - i; /* previous chunk of *sp */
lleft += strlen(rd->replstr); /* the replaced bit */
@@ -3074,7 +3121,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
lleft += l - i; /* final chunk from *sp */
start = t = zhalloc(lleft+1);
i = 0;
- for (nd = firstnode(repllist); nd; incnode(nd)) {
+ for (nd = firstnode(imd.repllist); nd; incnode(nd)) {
rd = (Repldata) getdata(nd);
memcpy(t, s + i, rd->b - i);
t += rd->b - i;
@@ -3087,13 +3134,19 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
start[lleft] = '\0';
*sp = (char *)start;
}
+ patfreestr(&patstralloc);
return 1;
}
- if (fl & SUB_LIST) /* safety: don't think this can happen */
+ if (fl & SUB_LIST) { /* safety: don't think this can happen */
+ patfreestr(&patstralloc);
return 0;
+ }
/* munge the whole string: no match, so no replstr */
- *sp = get_match_ret(*sp, 0, 0, fl, 0, 0);
+ imd.replstr = NULL;
+ imd.repllist = NULL;
+ *sp = get_match_ret(&imd, 0, 0);
+ patfreestr(&patstralloc);
return (fl & SUB_RETFAIL) ? 0 : 1;
}
@@ -3111,7 +3164,7 @@ static int
igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
LinkList *repllistp)
{
- char *s = *sp, *t;
+ char *s = *sp, *t, *send;
/*
* Note that ioff and uml count characters in the character
* set (Meta's are not included), while l counts characters in the
@@ -3119,36 +3172,48 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
* lengths.
*/
int ioff, l = strlen(*sp), uml = ztrlen(*sp), matched = 1, umlen;
- /*
- * List of bits of matches to concatenate with replacement string.
- * The data is a struct repldata. It is not used in cases like
- * ${...//#foo/bar} even though SUB_GLOBAL is set, since the match
- * is anchored. It goes on the heap.
- */
- LinkList repllist = NULL;
+ struct patstralloc patstralloc;
+ struct imatchdata imd;
+
+ (void)patallocstr(p, s, l, uml, 1, &patstralloc);
+ s = patstralloc.alloced;
+ DPUTS(!s, "forced patallocstr failed");
+ send = s + uml;
+
+ imd.mstr = *sp;
+ imd.mlen = l;
+ imd.ustr = s;
+ imd.ulen = uml;
+ imd.flags = fl;
+ imd.replstr = replstr;
+ imd.repllist = NULL;
/* perform must-match test for complex closures */
if (p->mustoff)
{
- /*
- * Yuk. Probably we should rewrite this whole function to
- * use an unmetafied test string.
- *
- * Use META_HEAPDUP because we need a terminating NULL.
- */
- char *muststr = metafy((char *)p + p->mustoff,
- p->patmlen, META_HEAPDUP);
+ char *muststr = (char *)p + p->mustoff;
- if (!strstr(s, muststr))
- matched = 0;
+ matched = 0;
+ if (p->patmlen <= uml)
+ {
+ for (t = s; t <= send - p->patmlen; t++)
+ {
+ if (!memcmp(muststr, t, p->patmlen)) {
+ matched = 1;
+ break;
+ }
+ }
+ }
}
/* in case we used the prog before... */
p->flags &= ~(PAT_NOTSTART|PAT_NOTEND);
if (fl & SUB_ALL) {
- int i = matched && pattry(p, s);
- *sp = get_match_ret(*sp, 0, i ? l : 0, fl, i ? replstr : 0, NULL);
+ int i = matched && pattrylen(p, s, uml, 0, &patstralloc, 0);
+ if (!i)
+ imd.replstr = NULL;
+ *sp = get_match_ret(&imd, 0, i ? l : 0);
if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i)))
return 0;
return 1;
@@ -3161,23 +3226,25 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
* Largest/smallest possible match at head of string.
* First get the longest match...
*/
- if (pattry(p, s)) {
+ if (pattrylen(p, s, uml, 0, &patstralloc, 0)) {
/* patmatchlen returns metafied length, as we need */
int mlen = patmatchlen();
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
+ send = s + mlen;
/*
* ... now we know whether it's worth looking for the
* shortest, which we do by brute force.
*/
for (t = s, umlen = 0; t < s + mlen; METAINC(t), umlen++) {
set_pat_end(p, *t);
- if (pattrylen(p, s, t - s, umlen, NULL, 0)) {
+ if (pattrylen(p, s, umlen, 0, &patstralloc, 0)) {
mlen = patmatchlen();
break;
}
}
}
- *sp = get_match_ret(*sp, 0, mlen, fl, replstr, NULL);
+ *sp = get_match_ret(&imd, 0, mlen);
+ patfreestr(&patstralloc);
return 1;
}
break;
@@ -3186,17 +3253,13 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
/* Smallest possible match at tail of string: *
* move back down string until we get a match. *
* There's no optimization here. */
- for (ioff = uml, t = s + l, umlen = 0; t >= s;
+ for (ioff = uml, t = send, umlen = 0; t >= s;
t--, ioff--, umlen++) {
- if (t > s && t[-1] == Meta)
- t--;
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
- *sp = get_match_ret(*sp, t - s, l, fl, replstr, NULL);
+ if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) {
+ *sp = get_match_ret(&imd, t - s, uml);
return 1;
}
- if (t > s+1 && t[-2] == Meta)
- t--;
}
break;
@@ -3204,61 +3267,59 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
/* Largest possible match at tail of string: *
* move forward along string until we get a match. *
* Again there's no optimisation. */
- for (ioff = 0, t = s, umlen = uml; t < s + l;
- ioff++, METAINC(t), umlen--) {
+ for (ioff = 0, t = s, umlen = uml; t < send;
+ ioff++, t++, umlen--) {
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
- *sp = get_match_ret(*sp, t-s, l, fl, replstr, NULL);
+ if (pattrylen(p, t, send - t, umlen, &patstralloc, ioff)) {
+ *sp = get_match_ret(&imd, t-s, uml);
return 1;
}
- if (*t == Meta)
- t++;
}
break;
case SUB_SUBSTR:
/* Smallest at start, but matching substrings. */
set_pat_start(p, l);
- if (!(fl & SUB_GLOBAL) && pattry(p, s + l) && !--n) {
- *sp = get_match_ret(*sp, 0, 0, fl, replstr, NULL);
+ if (!(fl & SUB_GLOBAL) &&
+ pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) {
+ *sp = get_match_ret(&imd, 0, 0);
return 1;
} /* fall through */
case (SUB_SUBSTR|SUB_LONG):
/* longest or smallest at start with substrings */
t = s;
if (fl & SUB_GLOBAL) {
- repllist = newlinklist();
+ imd.repllist = newlinklist();
if (repllistp)
- *repllistp = repllist;
+ *repllistp = imd.repllist;
}
ioff = 0; /* offset into string */
umlen = uml;
do {
/* loop over all matches for global substitution */
matched = 0;
- for (; t < s + l; METAINC(t), ioff++, umlen--) {
+ for (; t < send; t++, ioff++, umlen--) {
/* Find the longest match from this position. */
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
+ if (pattrylen(p, t, send - t, umlen, &patstralloc, ioff)) {
char *mpos = t + patmatchlen();
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
char *ptr;
int umlen2;
for (ptr = t, umlen2 = 0; ptr < mpos;
- METAINC(ptr), umlen2++) {
+ ptr++, umlen2++) {
set_pat_end(p, *ptr);
if (pattrylen(p, t, ptr - t, umlen2,
- NULL, ioff)) {
+ &patstralloc, ioff)) {
mpos = t + patmatchlen();
break;
}
}
}
if (!--n || (n <= 0 && (fl & SUB_GLOBAL))) {
- *sp = get_match_ret(*sp, t-s, mpos-s, fl,
- replstr, repllist);
+ *sp = get_match_ret(&imd, t-s, mpos-s);
if (mpos == t)
- METAINC(mpos);
+ mpos++;
}
if (!(fl & SUB_GLOBAL)) {
if (n) {
@@ -3278,13 +3339,13 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
* which is already marked for replacement.
*/
matched = 1;
- for ( ; t < mpos; t++, ioff++, umlen--)
- if (*t == Meta)
- t++;
+ while (t < mpos) {
+ ioff++;
+ umlen--;
+ t++;
+ }
break;
}
- if (*t == Meta)
- t++;
}
} while (matched);
/*
@@ -3294,8 +3355,9 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
*/
set_pat_start(p, l);
if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG &&
- pattry(p, s + l) && !--n) {
- *sp = get_match_ret(*sp, 0, 0, fl, replstr, repllist);
+ pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) {
+ *sp = get_match_ret(&imd, 0, 0);
+ patfreestr(&patstralloc);
return 1;
}
break;
@@ -3305,47 +3367,50 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
/* Longest/shortest at end, matching substrings. */
if (!(fl & SUB_LONG)) {
set_pat_start(p, l);
- if (pattrylen(p, s + l, 0, 0, NULL, uml) && !--n) {
- *sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
+ if (pattrylen(p, send, 0, 0, &patstralloc, uml) && !--n) {
+ *sp = get_match_ret(&imd, uml, uml);
+ patfreestr(&patstralloc);
return 1;
}
}
- for (ioff = uml - 1, t = s + l - 1, umlen = 1; t >= s;
+ for (ioff = uml - 1, t = send - 1, umlen = 1; t >= s;
t--, ioff--, umlen++) {
- if (t > s && t[-1] == Meta)
- t--;
set_pat_start(p, t-s);
- if (pattrylen(p, t, s + l - t, umlen, NULL, ioff) && !--n) {
+ if (pattrylen(p, t, send - t, umlen, &patstralloc, ioff) &&
+ !--n) {
/* Found the longest match */
char *mpos = t + patmatchlen();
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
char *ptr;
int umlen2;
for (ptr = t, umlen2 = 0; ptr < mpos;
- METAINC(ptr), umlen2++) {
+ ptr++, umlen2++) {
set_pat_end(p, *ptr);
- if (pattrylen(p, t, ptr - t, umlen2, NULL, ioff)) {
+ if (pattrylen(p, t, umlen2, 0, &patstralloc,
+ ioff)) {
mpos = t + patmatchlen();
break;
}
}
}
- *sp = get_match_ret(*sp, t-s, mpos-s, fl,
- replstr, NULL);
+ *sp = get_match_ret(&imd, t-s, mpos-s);
+ patfreestr(&patstralloc);
return 1;
}
}
set_pat_start(p, l);
- if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, NULL, uml) &&
+ if ((fl & SUB_LONG) && pattrylen(p, send, 0, 0,
+ &patstralloc, uml) &&
!--n) {
- *sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
+ *sp = get_match_ret(&imd, uml, uml);
+ patfreestr(&patstralloc);
return 1;
}
break;
}
}
- if (repllist && nonempty(repllist)) {
+ if (imd.repllist && nonempty(imd.repllist)) {
/* Put all the bits of a global search and replace together. */
LinkNode nd;
Repldata rd;
@@ -3353,8 +3418,13 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
char *ptr, *start;
int i;
+ /*
+ * Use metafied string again.
+ * Results from get_match_ret in repllist are all metafied.
+ */
+ s = *sp;
i = 0; /* start of last chunk we got from *sp */
- for (nd = firstnode(repllist); nd; incnode(nd)) {
+ for (nd = firstnode(imd.repllist); nd; incnode(nd)) {
rd = (Repldata) getdata(nd);
lleft += rd->b - i; /* previous chunk of *sp */
lleft += strlen(rd->replstr); /* the replaced bit */
@@ -3363,7 +3433,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
lleft += l - i; /* final chunk from *sp */
start = t = zhalloc(lleft+1);
i = 0;
- for (nd = firstnode(repllist); nd; incnode(nd)) {
+ for (nd = firstnode(imd.repllist); nd; incnode(nd)) {
rd = (Repldata) getdata(nd);
memcpy(t, s + i, rd->b - i);
t += rd->b - i;
@@ -3375,11 +3445,15 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
memcpy(t, s + i, l - i);
start[lleft] = '\0';
*sp = (char *)start;
+ patfreestr(&patstralloc);
return 1;
}
/* munge the whole string: no match, so no replstr */
- *sp = get_match_ret(*sp, 0, 0, fl, 0, 0);
+ imd.replstr = NULL;
+ imd.repllist = NULL;
+ *sp = get_match_ret(&imd, 0, 0);
+ patfreestr(&patstralloc);
return 1;
}
diff --git a/Src/pattern.c b/Src/pattern.c
index 03ba37d..8de372c 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -2204,7 +2204,10 @@ pattrylen(Patprog prog, char *string, int len, int unmetalen,
* the pattern module) at which we are trying to match.
* This is added in to the positions recorded in patbeginp and patendp
* when we are looking for substrings. Currently this only happens
- * in the parameter substitution code.
+ * in the parameter substitution code. It refers to a real character
+ * offset, i.e. is already in the form ready for presentation to the
+ * general public --- this is necessary as we don't have the
+ * information to convert it down here.
*
* Note this is a character offset, i.e. a single possibly metafied and
* possibly multibyte character counts as 1.
@@ -2292,7 +2295,8 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
*/
if (!patstralloc->progstrunmeta)
{
- patstralloc->progstrunmeta = dupstring(progstr);
+ patstralloc->progstrunmeta =
+ dupstrpfx(progstr, (int)prog->patmlen);
unmetafy(patstralloc->progstrunmeta,
&patstralloc->progstrunmetalen);
}
@@ -2346,7 +2350,7 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
* In the orignal structure, but it might be unmetafied
* for use with an unmetafied test string.
*/
- patinlen = (int)prog->patmlen;
+ patinlen = pstrlen;
/* if matching files, must update globbing flags */
patglobflags = prog->globend;
@@ -2360,7 +2364,7 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
* Unmetafied: pstrlen contains unmetafied
* length in bytes.
*/
- str = metafy(patinstart, pstrlen, META_ALLOC);
+ str = metafy(patinstart, pstrlen, META_DUP);
mlen = CHARSUB(patinstart, patinstart + pstrlen);
} else {
str = ztrduppfx(patinstart, patinlen);
@@ -2454,8 +2458,8 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
/*
* Optimization: if we didn't find any Meta characters
* to begin with, we don't need to look for them now.
- * Only do this if we did the unmetfication internally,
- * since otherwise it's too hard to work out.
+ *
+ * For patstralloc pased in, we want the unmetafied length.
*/
if (patstralloc == &patstralloc_struct &&
patstralloc->unmetalen != origlen) {
@@ -2588,7 +2592,9 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
/*
* Return length of previous succesful match. This is
- * in metafied bytes, i.e. includes a count of Meta characters.
+ * in metafied bytes, i.e. includes a count of Meta characters,
+ * unless the match was done on an unmetafied string using
+ * a patstralloc stuct, in which case it, too is unmetafed.
* Unusual and futile attempt at modular encapsulation.
*/
diff --git a/Src/zsh.h b/Src/zsh.h
index 32f2e0c..15fa5e4 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -480,6 +480,7 @@ typedef struct heap *Heap;
typedef struct heapstack *Heapstack;
typedef struct histent *Histent;
typedef struct hookdef *Hookdef;
+typedef struct imatchdata *Imatchdata;
typedef struct jobfile *Jobfile;
typedef struct job *Job;
typedef struct linkedmod *Linkedmod;
@@ -1593,6 +1594,31 @@ typedef struct zpc_disables_save *Zpc_disables_save;
/* Range: token followed by the (possibly multibyte) start and end */
#define PP_RANGE 21
+/*
+ * Argument to get_match_ret() in glob.c
+ */
+struct imatchdata {
+ /* Metafied trial string */
+ char *mstr;
+ /* Its length */
+ int mlen;
+ /* Unmetafied string */
+ char *ustr;
+ /* Its length */
+ int ulen;
+ /* Flags (SUB_*) */
+ int flags;
+ /* Replacement string (metafied) */
+ char *replstr;
+ /*
+ * List of bits of matches to concatenate with replacement string.
+ * The data is a struct repldata. It is not used in cases like
+ * ${...//#foo/bar} even though SUB_GLOBAL is set, since the match
+ * is anchored. It goes on the heap.
+ */
+ LinkList repllist;
+};
+
/* Globbing flags: lower 8 bits gives approx count */
#define GF_LCMATCHUC 0x0100
#define GF_IGNCASE 0x0200
Messages sorted by:
Reverse Date,
Date,
Thread,
Author