Zsh Mailing List Archive
Messages sorted by:
Reverse Date,
Date,
Thread,
Author
Re: Strange behavior of [[
The change from mb_metacharinit() to mb_charinit() is a bit unsightly
but the name had got just plain confusing --- there's nothing meta about
it. I never metacharacter I couldn't parse.
(No multibyte characters were harmed in the preparation of this email;
I've used $'\ua0'.)
pws
diff --git a/Src/Modules/curses.c b/Src/Modules/curses.c
index 41ad2c6..62dbd55 100644
--- a/Src/Modules/curses.c
+++ b/Src/Modules/curses.c
@@ -765,7 +765,7 @@ zccmd_string(const char *nam, char **args)
w = (ZCWin)getdata(node);
#ifdef HAVE_WADDWSTR
- mb_metacharinit();
+ mb_charinit();
wptr = wstr = zhalloc((strlen(str)+1) * sizeof(wchar_t));
while (*str && (clen = mb_metacharlenconv(str, &wc))) {
diff --git a/Src/Modules/regex.c b/Src/Modules/regex.c
index ce57de9..94f523f 100644
--- a/Src/Modules/regex.c
+++ b/Src/Modules/regex.c
@@ -115,6 +115,7 @@ zcond_regex_match(char **a, int id)
} else {
zlong offs;
char *ptr;
+ int clen, leftlen;
m = matches;
s = metafy(lhstr + m->rm_so, m->rm_eo - m->rm_so, META_DUP);
@@ -123,19 +124,25 @@ zcond_regex_match(char **a, int id)
* Count the characters before the match.
*/
ptr = lhstr;
+ leftlen = m->rm_so;
offs = 0;
- MB_METACHARINIT();
- while (ptr < lhstr + m->rm_so) {
+ MB_CHARINIT();
+ while (leftlen) {
offs++;
- ptr += MB_METACHARLEN(ptr);
+ clen = MB_CHARLEN(ptr, leftlen);
+ ptr += clen;
+ leftlen -= clen;
}
setiparam("MBEGIN", offs + !isset(KSHARRAYS));
/*
* Add on the characters in the match.
*/
- while (ptr < lhstr + m->rm_eo) {
+ leftlen = m->rm_eo - m->rm_so;
+ while (leftlen) {
offs++;
- ptr += MB_METACHARLEN(ptr);
+ clen = MB_CHARLEN(ptr, leftlen);
+ ptr += clen;
+ leftlen -= clen;
}
setiparam("MEND", offs + !isset(KSHARRAYS) - 1);
if (nelem) {
@@ -149,19 +156,25 @@ zcond_regex_match(char **a, int id)
{
char buf[DIGBUFSIZE];
ptr = lhstr;
+ leftlen = m->rm_so;
offs = 0;
/* Find the start offset */
- MB_METACHARINIT();
- while (ptr < lhstr + m->rm_so) {
+ MB_CHARINIT();
+ while (leftlen) {
offs++;
- ptr += MB_METACHARLEN(ptr);
+ clen = MB_CHARLEN(ptr, leftlen);
+ ptr += clen;
+ leftlen -= clen;
}
convbase(buf, offs + !isset(KSHARRAYS), 10);
*bptr = ztrdup(buf);
/* Continue to the end offset */
- while (ptr < lhstr + m->rm_eo) {
+ leftlen = m->rm_eo - m->rm_so;
+ while (leftlen ) {
offs++;
- ptr += MB_METACHARLEN(ptr);
+ clen = MB_CHARLEN(ptr, leftlen);
+ ptr += clen;
+ leftlen -= clen;
}
convbase(buf, offs + !isset(KSHARRAYS) - 1, 10);
*eptr = ztrdup(buf);
diff --git a/Src/Zle/complist.c b/Src/Zle/complist.c
index f542066..a02a5c3 100644
--- a/Src/Zle/complist.c
+++ b/Src/Zle/complist.c
@@ -728,7 +728,7 @@ clnicezputs(int do_colors, char *s, int ml)
if (do_colors)
initiscol();
- mb_metacharinit();
+ mb_charinit();
while (umleft > 0) {
size_t cnt = eol ? MB_INVALID : mbrtowc(&cc, uptr, umleft, &mbs);
diff --git a/Src/Zle/zle_utils.c b/Src/Zle/zle_utils.c
index e4ab97a..06e4581 100644
--- a/Src/Zle/zle_utils.c
+++ b/Src/Zle/zle_utils.c
@@ -1288,7 +1288,7 @@ showmsg(char const *msg)
p = unmetafy(umsg, &ulen);
memset(&mbs, 0, sizeof mbs);
- mb_metacharinit();
+ mb_charinit();
while (ulen > 0) {
char const *n;
if (*p == '\n') {
diff --git a/Src/builtin.c b/Src/builtin.c
index a3d847f..0edc070 100644
--- a/Src/builtin.c
+++ b/Src/builtin.c
@@ -4582,7 +4582,7 @@ bin_print(char *name, char **args, Options ops, int func)
convchar_t cc;
#ifdef MULTIBYTE_SUPPORT
if (isset(MULTIBYTE)) {
- mb_metacharinit();
+ mb_charinit();
(void)mb_metacharlenconv(metafy(curarg+1, curlen-1,
META_USEHEAP), &cc);
}
@@ -5557,7 +5557,7 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
wint_t wi;
if (isset(MULTIBYTE)) {
- mb_metacharinit();
+ mb_charinit();
(void)mb_metacharlenconv(delimstr, &wi);
}
else
diff --git a/Src/glob.c b/Src/glob.c
index 057d44a..eff34a2 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -2237,7 +2237,7 @@ xpandbraces(LinkList list, LinkNode *np)
#ifdef MULTIBYTE_SUPPORT
char *ncptr;
int nclen;
- mb_metacharinit();
+ mb_charinit();
ncptr = wcs_nicechar(cend, NULL, NULL);
nclen = strlen(ncptr);
p = zhalloc(lenalloc + nclen);
@@ -2805,7 +2805,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
* ... now we know whether it's worth looking for the
* shortest, which we do by brute force.
*/
- mb_metacharinit();
+ mb_charinit();
for (t = s, umlen = 0; t < s + mlen; ) {
set_pat_end(p, *t);
if (pattrylen(p, s, t - s, umlen, 0)) {
@@ -2831,7 +2831,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
* so that match, mbegin, mend and MATCH, MBEGIN, MEND are
* correct.
*/
- mb_metacharinit();
+ mb_charinit();
tmatch = NULL;
for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
set_pat_start(p, t-s);
@@ -2855,7 +2855,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
/* Largest possible match at tail of string: *
* move forward along string until we get a match. *
* Again there's no optimisation. */
- mb_metacharinit();
+ mb_charinit();
for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
set_pat_start(p, t-s);
if (pattrylen(p, t, s + l - t, umlen, ioff)) {
@@ -2889,7 +2889,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
}
ioff = 0; /* offset into string */
umlen = umltot;
- mb_metacharinit();
+ mb_charinit();
do {
/* loop over all matches for global substitution */
matched = 0;
@@ -2986,7 +2986,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
*/
nmatches = 0;
tmatch = NULL;
- mb_metacharinit();
+ mb_charinit();
for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
set_pat_start(p, t-s);
if (pattrylen(p, t, s + l - t, umlen, ioff)) {
@@ -3002,7 +3002,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
* We need to find the n'th last match.
*/
n = nmatches - n;
- mb_metacharinit();
+ mb_charinit();
for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
set_pat_start(p, t-s);
if (pattrylen(p, t, s + l - t, umlen, ioff) &&
diff --git a/Src/hist.c b/Src/hist.c
index bd03c4f..6725313 100644
--- a/Src/hist.c
+++ b/Src/hist.c
@@ -2000,7 +2000,7 @@ casemodify(char *str, int how)
VARARR(char, mbstr, MB_CUR_MAX);
mbstate_t ps;
- mb_metacharinit();
+ mb_charinit();
memset(&ps, 0, sizeof(ps));
while (*str) {
wint_t wc;
diff --git a/Src/prompt.c b/Src/prompt.c
index ffc1d0d..9e8589d 100644
--- a/Src/prompt.c
+++ b/Src/prompt.c
@@ -964,7 +964,7 @@ stradd(char *d)
/* FALL THROUGH */
default:
/* Take full wide character in one go */
- mb_metacharinit();
+ mb_charinit();
pc = wcs_nicechar(cc, NULL, NULL);
break;
}
diff --git a/Src/utils.c b/Src/utils.c
index c33c16d..13fc96a 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -82,7 +82,7 @@ set_widearray(char *mb_array, Widechar_array wca)
wchar_t *wcptr = tmpwcs;
wint_t wci;
- mb_metacharinit();
+ mb_charinit();
while (*mb_array) {
int mblen = mb_metacharlenconv(mb_array, &wci);
@@ -332,7 +332,7 @@ zerrmsg(FILE *file, const char *fmt, va_list ap)
case 'c':
num = va_arg(ap, int);
#ifdef MULTIBYTE_SUPPORT
- mb_metacharinit();
+ mb_charinit();
zputs(wcs_nicechar(num, NULL, NULL), file);
#else
zputs(nicechar(num), file);
@@ -461,12 +461,13 @@ static mbstate_t mb_shiftstate;
/*
* Initialise multibyte state: called before a sequence of
- * wcs_nicechar() or mb_metacharlenconv().
+ * wcs_nicechar(), mb_metacharlenconv(), or
+ * mb_charlenconv().
*/
/**/
mod_export void
-mb_metacharinit(void)
+mb_charinit(void)
{
memset(&mb_shiftstate, 0, sizeof(mb_shiftstate));
}
@@ -500,7 +501,7 @@ mb_metacharinit(void)
* (but not both). (Note the complication that the wide character
* part may contain metafied characters.)
*
- * The caller needs to call mb_metacharinit() before the first call, to
+ * The caller needs to call mb_charinit() before the first call, to
* set up the multibyte shift state for a range of characters.
*/
@@ -3832,7 +3833,7 @@ itype_end(const char *ptr, int itype, int once)
#ifdef MULTIBYTE_SUPPORT
if (isset(MULTIBYTE) &&
(itype != IIDENT || !isset(POSIXIDENTIFIERS))) {
- mb_metacharinit();
+ mb_charinit();
while (*ptr) {
wint_t wc;
int len = mb_metacharlenconv(ptr, &wc);
@@ -4972,6 +4973,65 @@ mb_metastrlenend(char *ptr, int width, char *eptr)
return num + num_in_char;
}
+/*
+ * The equivalent of mb_metacharlenconv_r() for
+ * strings that aren't metafied and hence have
+ * explicit lengths.
+ */
+
+/**/
+mod_export int
+mb_charlenconv_r(const char *s, int slen, wint_t *wcp, mbstate_t *mbsp)
+{
+ size_t ret = MB_INVALID;
+ char inchar;
+ const char *ptr;
+ wchar_t wc;
+
+ for (ptr = s; slen; ) {
+ inchar = *ptr;
+ ptr++;
+ slen--;
+ ret = mbrtowc(&wc, &inchar, 1, mbsp);
+
+ if (ret == MB_INVALID)
+ break;
+ if (ret == MB_INCOMPLETE)
+ continue;
+ if (wcp)
+ *wcp = wc;
+ return ptr - s;
+ }
+
+ if (wcp)
+ *wcp = WEOF;
+ /* No valid multibyte sequence */
+ memset(mbsp, 0, sizeof(*mbsp));
+ if (ptr > s) {
+ return 1; /* Treat as single byte character */
+ } else
+ return 0; /* Probably shouldn't happen */
+}
+
+/*
+ * The equivalent of mb_metacharlenconv() for
+ * strings that aren't metafied and hence have
+ * explicit lengths;
+ */
+
+/**/
+mod_export int
+mb_charlenconv(const char *s, int slen, wint_t *wcp)
+{
+ if (!isset(MULTIBYTE)) {
+ if (wcp)
+ *wcp = (wint_t)*s;
+ return 1;
+ }
+
+ return mb_charlenconv_r(s, slen, wcp, &mb_shiftstate);
+}
+
/**/
#else
@@ -4996,6 +5056,23 @@ metacharlenconv(const char *x, int *c)
return 1;
}
+/* Simple replacement for mb_charlenconv */
+
+/**/
+mod_export int
+charlenconv(const char *x, int len, int *c)
+{
+ if (!len) {
+ if (c)
+ *c = '\0';
+ return 0;
+ }
+
+ if (c)
+ *c = (char)*x;
+ return 1;
+}
+
/**/
#endif /* MULTIBYTE_SUPPORT */
diff --git a/Src/zsh.h b/Src/zsh.h
index c88c2e7..fb04929 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -2921,8 +2921,9 @@ enum {
#define AFTERTRAPHOOK (zshhooks + 2)
#ifdef MULTIBYTE_SUPPORT
+/* Metafied input */
#define nicezputs(str, outs) (void)mb_niceformat((str), (outs), NULL, 0)
-#define MB_METACHARINIT() mb_metacharinit()
+#define MB_METACHARINIT() mb_charinit()
typedef wint_t convchar_t;
#define MB_METACHARLENCONV(str, cp) mb_metacharlenconv((str), (cp))
#define MB_METACHARLEN(str) mb_metacharlenconv(str, NULL)
@@ -2932,6 +2933,11 @@ typedef wint_t convchar_t;
#define MB_METASTRLEN2END(str, widthp, eptr) \
mb_metastrlenend(str, widthp, eptr)
+/* Unmetafined input */
+#define MB_CHARINIT() mb_charinit()
+#define MB_CHARLENCONV(str, len, cp) mb_charlenconv((str), (len), (cp))
+#define MB_CHARLEN(str, len) mb_charlenconv((str), (len), NULL)
+
/*
* We replace broken implementations with one that uses Unicode
* characters directly as wide characters. In principle this is only
@@ -3015,6 +3021,10 @@ typedef int convchar_t;
#define MB_METASTRLEN2(str, widthp) ztrlen(str)
#define MB_METASTRLEN2END(str, widthp, eptr) ztrlenend(str, eptr)
+#define MB_CHARINIT()
+#define MB_CHARLENCONV(str, len, cp) charlenconv((str), (len), (cp))
+#define MB_CHARLEN(str, len) ((len) ? 1 : 0)
+
#define WCWIDTH_WINT(c) (1)
/* Leave character or string as is. */
diff --git a/Test/D07multibyte.ztst b/Test/D07multibyte.ztst
index c9ecb78..7fc07cc 100644
--- a/Test/D07multibyte.ztst
+++ b/Test/D07multibyte.ztst
@@ -484,3 +484,16 @@
# This doesn't look aligned in my editor because actually the characters
# aren't quite double width, but the arithmetic is correct.
# It appears just to be an effect of the font.
+
+ if zmodload -i zsh/regex 2>/dev/null; then
+ [[ $'\ua0' =~ '^.$' ]] && print OK
+ [[ $'\ua0' =~ $'^\ua0$' ]] && print OK
+ [[ $'\ua0'X =~ '^X$' ]] || print OK
+ else
+ print -u$ZTST_fd "Regexp test skipped, regexp library not found."
+ print -l OK OK OK
+ fi
+0:Ensure no confusion on metafied input to regex module
+>OK
+>OK
+>OK
Messages sorted by:
Reverse Date,
Date,
Thread,
Author