Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Support for locale defined sorting/character range



Hi!

I have done small changes to zsh-3.0-pre2 to support locale defined sort
order. The changes affect:

chracter range in [a-z] glob pattern (match if character is in range as
determined by current locale setting).

sorting of globbing results - now they are sorted in order of current
locale.

(Ooi) modifiers in parameter substitution - again, sort in order of
current locale.

I use strcoll(); I have added test for strcoll to configure.in;
I assume that config.h.in is easily recreated ;)

Note: I put check wether we allocate form permanent storage or form heap
in cstrpcmp and invcstrpcmp. Probably, check is not needed - I couldn't
trace it far enough.

Of course, it all assumes single-byte character set - no multibytes or
wide characters. It seems enough for now (well, for Europe/USSR anyway;)

thanks for any feedback (I am not on this list - please Cc if replying).

greetings

-------------------------------------------------------------------------
Andrej Borsenkow 		Fax:   +7 (095) 252 01 05
SNI ITS Moscow			Tel:   +7 (095) 252 13 88

NERV:  borsenkow.msk		E-Mail: borsenkow.msk@xxxxxx
-------------------------------------------------------------------------
==================== diffs =============================
--- Src/glob.c.org	Fri Jun 28 17:46:02 1996
+++ Src/glob.c	Tue Jul  9 17:01:18 1996
@@ -610,8 +610,13 @@
     char *c = *b, *d = *a;
     int cmp;
 
+#ifdef HAVE_STRCOLL
+    cmp = strcoll(c, d);
+#endif
     for (; *c == *d && *c; c++, d++);
+#ifndef HAVE_STRCOLL
     cmp = (int)STOUC(*c) - (int)STOUC(*d);
+#endif
     if (isset(NUMERICGLOBSORT) && (idigit(*c) || idigit(*d))) {
 	for (; c > *b && idigit(c[-1]); c--, d--);
 	if (idigit(*c) && idigit(*d)) {
@@ -1709,17 +1714,32 @@
 #define PAT(X) (pat[X] == Meta ? pat[(X)+1] ^ 32 : untok(pat[X]))
 #define PPAT(X) (pat[(X)-1] == Meta ? pat[X] ^ 32 : untok(pat[X]))
 	    char ch;
+#ifdef HAVE_STRCOLL
+	    char l_buf[2], r_buf[2], ch_buf[2];
+
+	    l_buf[1] = r_buf[1] = ch_buf[1] = '\0';
+#endif
 
 	    if (!*pptr)
 		break;
 	    ch = *pptr == Meta ? pptr[1] ^ 32 : *pptr;
+#ifdef HAVE_STRCOLL
+	    ch_buf[0] = ch;
+#endif
 	    if (pat[1] == Hat || pat[1] == '^' || pat[1] == '!') {
 		/* group is negated */
 		pat[1] = Hat;
 		for (pat += 2; *pat != Outbrack && *pat;
 		     *pat == Meta ? pat += 2 : pat++)
 		    if (*pat == '-' && pat[-1] != Hat && pat[1] != Outbrack) {
+#ifdef HAVE_STRCOLL
+			l_buf[0] = PPAT(-1);
+			r_buf[0] = PAT(1);
+			if (strcoll(l_buf, ch_buf) <= 0 &&
+			    strcoll(ch_buf, r_buf) <= 0)
+#else
 			if (PPAT(-1) <= ch && PAT(1) >= ch)
+#endif
 			    break;
 		    } else if (ch == PAT(0))
 			break;
@@ -1740,7 +1760,14 @@
 		     *pat == Meta ? pat += 2 : pat++)
 		    if (*pat == '-' && pat[-1] != Inbrack &&
 			       pat[1] != Outbrack) {
+#ifdef HAVE_STRCOLL
+			l_buf[0] = PPAT(-1);
+			r_buf[0] = PAT(1);
+			if (strcoll(l_buf, ch_buf) <= 0 &&
+			    strcoll(ch_buf, r_buf) <= 0)
+#else
 			if (PPAT(-1) <= ch && PAT(1) >= ch)
+#endif
 			    break;
 		    } else if (ch == PAT(0))
 			break;
--- Src/subst.c.org	Fri Jun 28 18:46:24 1996
+++ Src/subst.c	Wed Jul 10 13:01:47 1996
@@ -430,36 +430,80 @@
 int
 strpcmp(const void *a, const void *b)
 {
+#ifdef HAVE_STRCOLL
+    return strcoll(*(char **)a, *(char **)b);
+#else
     return strcmp(*(char **)a, *(char **)b);
+#endif
 }
 
 /**/
 int
 invstrpcmp(const void *a, const void *b)
 {
+#ifdef HAVE_STRCOLL
+    return -strcoll(*(char **)a, *(char **)b);
+#else
     return -strcmp(*(char **)a, *(char **)b);
+#endif
 }
 
 /**/
 int
 cstrpcmp(const void *a, const void *b)
 {
+#ifdef HAVE_STRCOLL
+    char *c = dupstring(*(char **)a), *d = dupstring(*(char **)b);
+    char *cc = c, *dd = d;
+    int   cmp;
+
+    while (*cc++) cc[-1] = tulower(cc[-1]);
+    while (*dd++) dd[-1] = tulower(dd[-1]);
+
+    cmp = strcoll(c, d);
+
+    if (!useheap) {
+	free(c);
+	free(d);
+    }
+
+    return cmp;
+#else
     char *c = *(char **)a, *d = *(char **)b;
 
     for (; *c && tulower(*c) == tulower(*d); c++, d++);
 
     return (int)STOUC(tulower(*c)) - (int)STOUC(tulower(*d));
+#endif
 }
 
 /**/
 int
 invcstrpcmp(const void *a, const void *b)
 {
+#ifdef HAVE_STRCOLL
+    char *c = dupstring(*(char **)a), *d = dupstring(*(char **)b);
+    char *cc = c, *dd = d;
+    int   cmp;
+
+    while (*cc++) cc[-1] = tulower(cc[-1]);
+    while (*dd++) dd[-1] = tulower(dd[-1]);
+
+    cmp = strcoll(c, d);
+
+    if (!useheap) {
+	free(c);
+	free(d);
+    }
+
+    return -cmp;
+#else
     char *c = *(char **)a, *d = *(char **)b;
 
     for (; *c && tulower(*c) == tulower(*d); c++, d++);
 
     return (int)STOUC(tulower(*d)) - (int)STOUC(tulower(*c));
+#endif
 }
 
 /**/
--- configure.in.org	Fri Jul  5 00:06:46 1996
+++ configure.in	Tue Jul  9 14:25:34 1996
@@ -409,7 +409,7 @@
 AC_CHECK_FUNCS(strftime waitpid select tcsetpgrp tcgetattr strstr lstat \
               getlogin setpgid gettimeofday gethostname mkfifo wait3 difftime  \
               sigblock sigsetmask sigrelse sighold killpg sigaction getrlimit  \
-              sigprocmask setuid seteuid setreuid setresuid strerror)
+              sigprocmask setuid seteuid setreuid setresuid strerror strcoll)
 
 dnl -------------
 dnl CHECK SIGNALS





Messages sorted by: Reverse Date, Date, Thread, Author