Zsh Mailing List Archive
Messages sorted by:
Reverse Date,
Date,
Thread,
Author
Re: [^ax-y] doesn't work but [^x-ya] does
On Sat, 4 Mar 2017 15:21:43 -0800
Bart Schaefer <schaefer@xxxxxxxxxxxxxxxx> wrote:
> On Mar 2, 11:53am, Peter Stephenson wrote:
> }
> } Here's the brute force approach --- it passes all tests, but I bet
> } there's more fall out to come, and it's all over the place, so I'm not
> } sure if there might be a better way.
>
> Maybe the better way is a macro along the lines of those in ztype.h?
> Perhaps idash(X) ? Doesn't change the number of places in the code
> that have to be touched, but perhaps cleaner.
A bit neater; I've avoided it looking too much the itype macros as it's
a bit different, explicitly looking for either the raw or tokenized form
rather than a type. But I suppose it would be straightforward to mark
both with a bit; I don't think there's a huge difference either way.
I may just commit this and see how it works.
pws
diff --git a/Src/cond.c b/Src/cond.c
index 8ab0193..9b739f6 100644
--- a/Src/cond.c
+++ b/Src/cond.c
@@ -138,13 +138,13 @@ evalcond(Estate state, char *fromtest)
strs = arrdup(sbuf);
l = 2;
}
- if (name && name[0] == '-')
+ if (name && IS_DASH(name[0]))
errname = name;
- else if (strs[0] && *strs[0] == '-')
+ else if (strs[0] && IS_DASH(*strs[0]))
errname = strs[0];
else
errname = "<null>";
- if (name && name[0] == '-' &&
+ if (name && IS_DASH(name[0]) &&
(cd = getconddef((ctype == COND_MODI), name + 1, 1))) {
if (ctype == COND_MOD &&
(l < cd->min || (cd->max >= 0 && l > cd->max))) {
@@ -171,7 +171,7 @@ evalcond(Estate state, char *fromtest)
strs[0] = dupstring(name);
name = s;
- if (name && name[0] == '-' &&
+ if (name && IS_DASH(name[0]) &&
(cd = getconddef(0, name + 1, 1))) {
if (l < cd->min || (cd->max >= 0 && l > cd->max)) {
zwarnnam(fromtest, "unknown condition: %s",
diff --git a/Src/exec.c b/Src/exec.c
index 83d1513..3a8c268 100644
--- a/Src/exec.c
+++ b/Src/exec.c
@@ -2779,9 +2779,10 @@ execcmd_exec(Estate state, Execcmd_params eparams,
char *argdata = (char *) getdata(argnode);
char *cmdopt;
int has_p = 0, has_vV = 0, has_other = 0;
- while (*argdata == '-') {
+ while (IS_DASH(*argdata)) {
/* Just to be definite, stop on single "-", too, */
- if (!argdata[1] || (argdata[1] == '-' && !argdata[2]))
+ if (!argdata[1] ||
+ (IS_DASH(argdata[1]) && !argdata[2]))
break;
for (cmdopt = argdata+1; *cmdopt; cmdopt++) {
switch (*cmdopt) {
@@ -2835,7 +2836,7 @@ execcmd_exec(Estate state, Execcmd_params eparams,
* as if this is command [non-option-stuff]. This
* isn't a good place for standard option handling.
*/
- if (!strcmp(argdata, "--"))
+ if (IS_DASH(argdata[0]) && IS_DASH(argdata[1]) && !argdata[2])
uremnode(args, firstnode(args));
}
if ((cflags & BINF_EXEC) && nextnode(firstnode(args))) {
@@ -2855,7 +2856,7 @@ execcmd_exec(Estate state, Execcmd_params eparams,
* people aren't likely to mix the option style
* with the zsh style.
*/
- while (next && *next == '-' && strlen(next) >= 2) {
+ while (next && IS_DASH(*next) && strlen(next) >= 2) {
if (!firstnode(args)) {
zerr("exec requires a command to execute");
lastval = 1;
@@ -2863,7 +2864,7 @@ execcmd_exec(Estate state, Execcmd_params eparams,
goto done;
}
uremnode(args, firstnode(args));
- if (!strcmp(next, "--"))
+ if (IS_DASH(next[0]) && IS_DASH(next[1]) && !next[2])
break;
for (cmdopt = &next[1]; *cmdopt; ++cmdopt) {
switch (*cmdopt) {
diff --git a/Src/glob.c b/Src/glob.c
index ff6b258..87127e1 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -1314,6 +1314,7 @@ zglob(LinkList list, LinkNode np, int nountok)
sense ^= 1;
break;
case '-':
+ case Dash:
/* Toggle matching of symbolic links */
sense ^= 2;
break;
@@ -1608,7 +1609,7 @@ zglob(LinkList list, LinkNode np, int nountok)
++s;
}
/* See if it's greater than, equal to, or less than */
- if ((g_range = *s == '+' ? 1 : *s == '-' ? -1 : 0))
+ if ((g_range = *s == '+' ? 1 : IS_DASH(*s) ? -1 : 0))
++s;
data = qgetnum(&s);
break;
@@ -2025,13 +2026,13 @@ hasbraces(char *str)
if (bracechardots(str-1, NULL, NULL))
return 1;
lbr = str - 1;
- if (*str == '-')
+ if (IS_DASH(*str))
str++;
while (idigit(*str))
str++;
if (*str == '.' && str[1] == '.') {
str++; str++;
- if (*str == '-')
+ if (IS_DASH(*str))
str++;
while (idigit(*str))
str++;
@@ -2040,7 +2041,7 @@ hasbraces(char *str)
return 1;
else if (*str == '.' && str[1] == '.') {
str++; str++;
- if (*str == '-')
+ if (IS_DASH(*str))
str++;
while (idigit(*str))
str++;
@@ -2123,7 +2124,7 @@ xpandredir(struct redir *fn, LinkList redirtab)
fn->name = s;
untokenize(s);
if (fn->type == REDIR_MERGEIN || fn->type == REDIR_MERGEOUT) {
- if (s[0] == '-' && !s[1])
+ if (IS_DASH(s[0]) && !s[1])
fn->type = REDIR_CLOSE;
else if (s[0] == 'p' && !s[1])
fn->fd2 = -2;
@@ -2329,12 +2330,14 @@ xpandbraces(LinkList list, LinkNode *np)
* str+1 is the first number in the range, dots+2 the last,
* and dots2+2 is the increment if that's given. */
/* TODO: sorry about this */
- int minw = (str[1] == '0' || (str[1] == '-' && str[2] == '0'))
+ int minw = (str[1] == '0' ||
+ (IS_DASH(str[1]) && str[2] == '0'))
? wid1
- : (dots[2] == '0' || (dots[2] == '-' && dots[3] == '0'))
+ : (dots[2] == '0' ||
+ (IS_DASH(dots[2]) && dots[3] == '0'))
? wid2
: (dots2 && (dots2[2] == '0' ||
- (dots2[2] == '-' && dots2[3] == '0')))
+ (IS_DASH(dots2[2]) && dots2[3] == '0')))
? wid3
: 0;
if (rincr < 0) {
@@ -2392,7 +2395,7 @@ xpandbraces(LinkList list, LinkNode *np)
c2 = ztokens[c2 - STOUC(Pound)];
if ((char) c2 == Meta)
c2 = 32 ^ p[1];
- if (c1 == '-' && lastch >= 0 && p < str2 && lastch <= (int)c2) {
+ if (IS_DASH(c1) && lastch >= 0 && p < str2 && lastch <= (int)c2) {
while (lastch < (int)c2)
ccl[lastch++] = 1;
lastch = -1;
@@ -3528,7 +3531,7 @@ zshtokenize(char *s, int flags)
}
t = s;
while (idigit(*++s));
- if (*s != '-')
+ if (!IS_DASH(*s))
goto cont;
while (idigit(*++s));
if (*s != '>')
diff --git a/Src/lex.c b/Src/lex.c
index 8896128..59e9d14 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -1359,17 +1359,13 @@ gettokstr(int c, int sub)
case LX2_DASH:
/*
* - shouldn't be treated as a special character unless
- * we're in a pattern. Howeve,simply counting "[" doesn't
- * work as []a-z] is a valid expression and we don't know
- * down here what this "[" is for as $foo[stuff] is valid
- * in zsh. So just detect an opening [, which is enough
- * to turn this into a pattern; the Dash will be harmlessly
- * untokenised if not wanted.
+ * we're in a pattern. Unfortunately, working out for
+ * sure in complicated expressions whether we're in a
+ * pattern is tricky. So we'll make it special and
+ * turn it back any time we don't need it special.
+ * This is not ideal as it's a lot of work.
*/
- if (seen_brct)
- c = Dash;
- else
- c = '-';
+ c = Dash;
break;
case LX2_BANG:
/*
diff --git a/Src/math.c b/Src/math.c
index f19c0ed..f961300 100644
--- a/Src/math.c
+++ b/Src/math.c
@@ -463,7 +463,7 @@ lexconstant(void)
char *nptr;
nptr = ptr;
- if (*nptr == '-')
+ if (IS_DASH(*nptr))
nptr++;
if (*nptr == '0') {
@@ -527,7 +527,7 @@ lexconstant(void)
}
if (*nptr == 'e' || *nptr == 'E') {
nptr++;
- if (*nptr == '+' || *nptr == '-')
+ if (*nptr == '+' || IS_DASH(*nptr))
nptr++;
while (idigit(*nptr) || *nptr == '_')
nptr++;
@@ -599,7 +599,8 @@ zzlex(void)
}
return (unary) ? UPLUS : PLUS;
case '-':
- if (*ptr == '-') {
+ case Dash:
+ if (IS_DASH(*ptr)) {
ptr++;
return (unary) ? PREMINUS : POSTMINUS;
}
diff --git a/Src/parse.c b/Src/parse.c
index 699ea49..6fe283d 100644
--- a/Src/parse.c
+++ b/Src/parse.c
@@ -2317,6 +2317,19 @@ par_cond_1(void)
}
/*
+ * Return 1 if condition matches. This also works for non-elided options.
+ *
+ * input is test string, may begin - or Dash.
+ * cond is condition following the -.
+ */
+static int check_cond(const char *input, const char *cond)
+{
+ if (!IS_DASH(input[0]))
+ return 0;
+ return !strcmp(input + 1, cond);
+}
+
+/*
* cond_2 : BANG cond_2
| INPAR { SEPER } cond_2 { SEPER } OUTPAR
| STRING STRING STRING
@@ -2342,7 +2355,7 @@ par_cond_2(void)
s1 = tokstr;
condlex();
/* ksh behavior: [ -t ] means [ -t 1 ]; bash disagrees */
- if (unset(POSIXBUILTINS) && !strcmp(s1, "-t"))
+ if (unset(POSIXBUILTINS) && check_cond(s1, "t"))
return par_cond_double(s1, dupstring("1"));
return par_cond_double(dupstring("-n"), s1);
}
@@ -2352,7 +2365,7 @@ par_cond_2(void)
if (!strcmp(*testargs, "=") ||
!strcmp(*testargs, "==") ||
!strcmp(*testargs, "!=") ||
- (**testargs == '-' && get_cond_num(*testargs + 1) >= 0)) {
+ (IS_DASH(**testargs) && get_cond_num(*testargs + 1) >= 0)) {
s1 = tokstr;
condlex();
s2 = tokstr;
@@ -2374,8 +2387,8 @@ par_cond_2(void)
* In "test" compatibility mode, "! -a ..." and "! -o ..."
* are treated as "[string] [and] ..." and "[string] [or] ...".
*/
- if (!(n_testargs > 1 &&
- (!strcmp(*testargs, "-a") || !strcmp(*testargs, "-o"))))
+ if (!(n_testargs > 1 && (check_cond(*testargs, "a") ||
+ check_cond(*testargs, "o"))))
{
condlex();
ecadd(WCB_COND(COND_NOT, 0));
@@ -2397,7 +2410,7 @@ par_cond_2(void)
return r;
}
s1 = tokstr;
- dble = (s1 && *s1 == '-'
+ dble = (s1 && IS_DASH(*s1)
&& (!n_testargs
|| strspn(s1+1, "abcdefghknoprstuvwxzLONGS") == 1)
&& !s1[2]);
@@ -2411,7 +2424,7 @@ par_cond_2(void)
YYERROR(ecused);
}
condlex();
- if (n_testargs == 2 && tok != STRING && tokstr && s1[0] == '-') {
+ if (n_testargs == 2 && tok != STRING && tokstr && IS_DASH(s1[0])) {
/*
* Something like "test -z" followed by a token.
* We'll turn the token into a string (we've also
@@ -2446,9 +2459,9 @@ par_cond_2(void)
} else
YYERROR(ecused);
}
- s2 = tokstr;
+ s2 = tokstr;
if (!n_testargs)
- dble = (s2 && *s2 == '-' && !s2[2]);
+ dble = (s2 && IS_DASH(*s2) && !s2[2]);
incond++; /* parentheses do globbing */
do condlex(); while (COND_SEP());
incond--; /* parentheses do grouping */
@@ -2476,7 +2489,7 @@ par_cond_2(void)
static int
par_cond_double(char *a, char *b)
{
- if (a[0] != '-' || !a[1])
+ if (!IS_DASH(a[0]) || !a[1])
COND_ERROR("parse error: condition expected: %s", a);
else if (!a[2] && strspn(a+1, "abcdefgknoprstuvwxzhLONGS") == 1) {
ecadd(WCB_COND(a[1], 0));
@@ -2534,7 +2547,7 @@ par_cond_triple(char *a, char *b, char *c)
ecadd(WCB_COND(COND_REGEX, 0));
ecstr(a);
ecstr(c);
- } else if (b[0] == '-') {
+ } else if (IS_DASH(b[0])) {
if ((t0 = get_cond_num(b + 1)) > -1) {
ecadd(WCB_COND(t0 + COND_NT, 0));
ecstr(a);
@@ -2545,7 +2558,7 @@ par_cond_triple(char *a, char *b, char *c)
ecstr(a);
ecstr(c);
}
- } else if (a[0] == '-' && a[1]) {
+ } else if (IS_DASH(a[0]) && a[1]) {
ecadd(WCB_COND(COND_MOD, 2));
ecstr(a);
ecstr(b);
@@ -2560,7 +2573,7 @@ par_cond_triple(char *a, char *b, char *c)
static int
par_cond_multi(char *a, LinkList l)
{
- if (a[0] != '-' || !a[1])
+ if (!IS_DASH(a[0]) || !a[1])
COND_ERROR("condition expected: %s", a);
else {
LinkNode n;
@@ -3256,10 +3269,10 @@ build_dump(char *nam, char *dump, char **files, int ali, int map, int flags)
for (hlen = FD_PRELEN, tlen = 0; *files; files++) {
struct stat st;
- if (!strcmp(*files, "-k")) {
+ if (check_cond(*files, "k")) {
flags = (flags & ~(FDHF_KSHLOAD | FDHF_ZSHLOAD)) | FDHF_KSHLOAD;
continue;
- } else if (!strcmp(*files, "-z")) {
+ } else if (check_cond(*files, "z")) {
flags = (flags & ~(FDHF_KSHLOAD | FDHF_ZSHLOAD)) | FDHF_ZSHLOAD;
continue;
}
diff --git a/Src/pattern.c b/Src/pattern.c
index 928790f..75db016 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -1521,7 +1521,7 @@ patcomppiece(int *flagp, int paren)
patparse = nptr;
len |= 1;
}
- DPUTS(*patparse != '-', "BUG: - missing from numeric glob");
+ DPUTS(!IS_DASH(*patparse), "BUG: - missing from numeric glob");
patparse++;
if (idigit(*patparse)) {
to = (zrange_t) zstrtol((char *)patparse,
diff --git a/Src/subst.c b/Src/subst.c
index 02dbe28..2214b3d 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -481,6 +481,8 @@ multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep,
for ( ; *x; x += l) {
int rawc = -1;
convchar_t c;
+ if (*x == Dash)
+ *x = '-';
if (itok(STOUC(*x))) {
/* token, can't be separator, must be single byte */
rawc = *x;
@@ -1766,7 +1768,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
*/
c = *s;
if (itype_end(s, IIDENT, 1) == s && *s != '#' && c != Pound &&
- c != '-' && c != '!' && c != '$' && c != String && c != Qstring &&
+ !IS_DASH(c) &&
+ c != '!' && c != '$' && c != String && c != Qstring &&
c != '?' && c != Quest &&
c != '*' && c != Star && c != '@' && c != '{' &&
c != Inbrace && c != '=' && c != Equals && c != Hat &&
@@ -1895,13 +1898,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
if (quotetype == QT_DOLLARS ||
quotetype == QT_BACKSLASH_PATTERN)
goto flagerr;
- if (s[1] == '-' || s[1] == '+') {
+ if (IS_DASH(s[1]) || s[1] == '+') {
if (quotemod)
goto flagerr;
s++;
quotemod = 1;
- quotetype = (*s == '-') ? QT_SINGLE_OPTIONAL :
- QT_QUOTEDZPUTS;
+ quotetype = (*s == '+') ? QT_QUOTEDZPUTS :
+ QT_SINGLE_OPTIONAL;
} else {
if (quotetype == QT_SINGLE_OPTIONAL) {
/* extra q's after '-' not allowed */
@@ -2208,9 +2211,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
* properly in the first place we wouldn't
* have this nonsense.
*/
- || ((cc == '#' || cc == Pound) &&
- s[2] == Outbrace)
- || cc == '-' || (cc == ':' && s[2] == '-')
+ || ((cc == '#' || cc == Pound) && s[2] == Outbrace)
+ || IS_DASH(cc)
+ || (cc == ':' && IS_DASH(s[2]))
|| (isstring(cc) && (s[2] == Inbrace || s[2] == Inpar)))) {
getlen = 1 + whichlen, s++;
/*
@@ -2605,14 +2608,17 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
* Again, this duplicates tests for characters we're about to
* examine properly later on.
*/
- if (inbrace &&
- (c = *s) != '-' && c != '+' && c != ':' && c != '%' && c != '/' &&
- c != '=' && c != Equals &&
- c != '#' && c != Pound &&
- c != '?' && c != Quest &&
- c != '}' && c != Outbrace) {
- zerr("bad substitution");
- return NULL;
+ if (inbrace) {
+ c = *s;
+ if (!IS_DASH(c) &&
+ c != '+' && c != ':' && c != '%' && c != '/' &&
+ c != '=' && c != Equals &&
+ c != '#' && c != Pound &&
+ c != '?' && c != Quest &&
+ c != '}' && c != Outbrace) {
+ zerr("bad substitution");
+ return NULL;
+ }
}
/*
* Join arrays up if we're in quotes and there isn't some
@@ -2690,8 +2696,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
/* Check for ${..?..} or ${..=..} or one of those. *
* Only works if the name is in braces. */
- if (inbrace && ((c = *s) == '-' ||
- c == '+' ||
+ if (inbrace && ((c = *s) == '+' ||
+ IS_DASH(c) ||
c == ':' || /* i.e. a doubled colon */
c == '=' || c == Equals ||
c == '%' ||
@@ -2802,6 +2808,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
vunset = 1;
/* Fall Through! */
case '-':
+ case Dash:
if (vunset) {
int split_flags;
val = dupstring(s);
diff --git a/Src/utils.c b/Src/utils.c
index 7f3ddad..9669944 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -2376,7 +2376,7 @@ zstrtol_underscore(const char *s, char **t, int base, int underscore)
while (inblank(*s))
s++;
- if ((neg = (*s == '-')))
+ if ((neg = IS_DASH(*s)))
s++;
else if (*s == '+')
s++;
@@ -6118,7 +6118,9 @@ quotedzputs(char const *s, FILE *stream)
} else
*ptr++ = '\'';
while(*s) {
- if (*s == Meta)
+ if (*s == Dash)
+ c = '-';
+ else if (*s == Meta)
c = *++s ^ 32;
else
c = *s;
@@ -6155,7 +6157,9 @@ quotedzputs(char const *s, FILE *stream)
} else {
/* use Bourne-style quoting, avoiding empty quoted strings */
while (*s) {
- if (*s == Meta)
+ if (*s == Dash)
+ c = '-';
+ else if (*s == Meta)
c = *++s ^ 32;
else
c = *s;
diff --git a/Src/zsh.h b/Src/zsh.h
index f2c2790..ce41b17 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -238,6 +238,16 @@ struct mathfunc {
#define PATCHARS "#^*()|[]<>?~\\"
/*
+ * Check for a possibly tokenized dash.
+ *
+ * A dash only needs to be a token in a character range, [a-z], but
+ * it's difficult in general to ensure that. So it's turned into
+ * a token at the usual point in the lexer. However, we need
+ * to check for a literal dash at many opints.
+ */
+#define IS_DASH(x) ((x) == '-' || (x) == Dash)
+
+/*
* Types of quote. This is used in various places, so care needs
* to be taken when changing them. (Oooh, don't you look surprised.)
* - Passed to quotestring() to indicate style. This is the ultimate
diff --git a/Test/D02glob.ztst b/Test/D02glob.ztst
index 1385d57..413381f 100644
--- a/Test/D02glob.ztst
+++ b/Test/D02glob.ztst
@@ -686,3 +686,9 @@
rm glob.tmp/link
0:modifier ':P' resolves symlinks before '..' components
*>*glob.tmp/hello/world
+
+ foo=a
+ value="ac"
+ print ${value//[${foo}b-z]/x}
+0:handling of - range in complicated pattern context
+>xx
Messages sorted by:
Reverse Date,
Date,
Thread,
Author