Zsh Mailing List Archive
Messages sorted by:
Reverse Date,
Date,
Thread,
Author
Re: PATCH: multibyte characters in patterns.
- X-seq: zsh-workers 22476
- From: Wayne Davison <wayned@xxxxxxxxxxxxxxxxxxxxx>
- To: Peter Stephenson <p.w.stephenson@xxxxxxxxxxxx>
- Subject: Re: PATCH: multibyte characters in patterns.
- Date: Tue, 30 May 2006 18:16:30 -0700
- Cc: Zsh hackers list <zsh-workers@xxxxxxxxxx>
- In-reply-to: <200604092139.k39Lcw7P015097@xxxxxxxxxxxxxxxxx>
- Mailing-list: contact zsh-workers-help@xxxxxxxxxx; run by ezmlm
- References: <200604092139.k39Lcw7P015097@xxxxxxxxxxxxxxxxx>
On Sun, Apr 09, 2006 at 10:38:58PM +0100, Peter Stephenson wrote:
> This adds handling for multibyte characters in patterns when the shell
> is compiled with MULTIBYTE_SUPPORT.
FYI, I just checked in a few changes to pattern.c to fix some compiler
warnings about signed/unsigned mismatches. There were two spots that
were still using an "int" where they should have been using the new
patchar_t typedef. One of these was assigning a literal -1 to the
variable, so I added a PEOF define to use either WEOF or EOF for this
value (like the Zle code uses -- perhaps we should make a global define
for this in zsh.h?). Finally, I changed the multibyte version of
patchar_t to use wint_t instead of wchar_t (since this is what the
towupper()-style functions return), and in so doing, I renamed the
typedef to be patint_t (since the non-multibyte version is an "int").
Attached is the patch.
..wayne..
--- Src/pattern.c 25 Apr 2006 17:40:26 -0000 1.33
+++ Src/pattern.c 31 May 2006 00:56:56 -0000
@@ -276,7 +276,9 @@ static int patglobflags; /* globbing fl
* Increment pointer to metafied multibyte string.
*/
#ifdef MULTIBYTE_SUPPORT
-typedef wchar_t patchar_t;
+typedef wint_t patint_t;
+
+#define PEOF WEOF
#define METACHARINC(x) ((void)metacharinc(&x))
@@ -345,7 +347,9 @@ metacharinc(char **x)
}
#else
-typedef int patchar_t;
+typedef int patint_t;
+
+#define PEOF EOF
#define METACHARINC(x) ((void)((x) += (*(x) == Meta) ? 2 : 1))
/*
@@ -1689,7 +1693,7 @@ charsub(char *x, char *y)
/* Get a character and increment */
#define CHARREFINC(x, y) (STOUC(*(x)++))
/* Counter the number of characters between two pointers, smaller first */
-#define CHARSUB(x,y) (y-x)
+#define CHARSUB(x,y) ((y) - (x))
#endif /* MULTIBYTE_SUPPORT */
@@ -2171,8 +2175,9 @@ patmatch(Upat prog)
/* Current and next nodes */
Upat scan = prog, next, opnd;
char *start, *save, *chrop, *chrend, *compend;
- int savglobflags, op, no, min, nextch, fail = 0, saverrsfound;
+ int savglobflags, op, no, min, fail = 0, saverrsfound;
zrange_t from, to, comp;
+ patint_t nextch;
while (scan) {
next = PATNEXT(scan);
@@ -2204,8 +2209,8 @@ patmatch(Upat prog)
while (chrop < chrend && patinput < patinend) {
char *savpatinput = patinput;
char *savchrop = chrop;
- patchar_t chin = CHARREFINC(patinput, patinend);
- patchar_t chpa = CHARREFINC(chrop, chrend);
+ patint_t chin = CHARREFINC(patinput, patinend);
+ patint_t chpa = CHARREFINC(chrop, chrend);
if (!CHARMATCH(chin, chpa)) {
fail = 1;
patinput = savpatinput;
@@ -2747,14 +2752,14 @@ patmatch(Upat prog)
}
nextch = CHARREF(nextop, nextop + nextlen);
} else
- nextch = -1;
+ nextch = PEOF;
savglobflags = patglobflags;
saverrsfound = errsfound;
lastcharstart = charstart + (patinput - start);
if (no >= min) {
for (;;) {
- int charmatch_cache;
- if (nextch < 0 ||
+ patint_t charmatch_cache;
+ if (nextch == PEOF ||
(patinput < patinend &&
CHARMATCH_EXPR(CHARREF(patinput, patinend),
nextch))) {
@@ -2862,10 +2867,10 @@ patmatch(Upat prog)
*/
if (save < patinend && nextin < patinend &&
nextexact < exactend) {
- patchar_t cin0 = CHARREF(save, patinend);
- patchar_t cpa0 = CHARREF(exactpos, exactend);
- patchar_t cin1 = CHARREF(nextin, patinend);
- patchar_t cpa1 = CHARREF(nextexact, exactend);
+ patint_t cin0 = CHARREF(save, patinend);
+ patint_t cpa0 = CHARREF(exactpos, exactend);
+ patint_t cin1 = CHARREF(nextin, patinend);
+ patint_t cpa1 = CHARREF(nextexact, exactend);
if (CHARMATCH(cin0, cpa1) &&
CHARMATCH(cin1, cpa0)) {
@@ -3154,7 +3159,7 @@ patmatchrange(char *range, int ch)
static int patrepeat(Upat p, char *charstart)
{
int count = 0;
- patchar_t tch, charmatch_cache;
+ patint_t tch, charmatch_cache;
char *scan, *opnd;
scan = patinput;
Messages sorted by:
Reverse Date,
Date,
Thread,
Author