Zsh Mailing List Archive
Messages sorted by:
Reverse Date,
Date,
Thread,
Author
Re: Incorrect sorting of Polish characters
On Mon, 18 Jul 2016 10:33:29 +0100
Peter Stephenson <p.stephenson@xxxxxxxxxxx> wrote:
> On Sat, 16 Jul 2016 13:07:18 -0700
> Bart Schaefer <schaefer@xxxxxxxxxxxxxxxx> wrote:
> > On Jul 16, 7:17pm, M. Bartoszkiewicz wrote:
> > } I have noticed that some Polish characters
> > } are sorted incorrectly in glob expansion (but
> > } correctly in other contexts).
>
> A simple-minded change to pass strcoll() unmetafied versions of the
> strings does seem to fix the problem, so it looks like this is the
> case. However, that's not the right fix as we only want to unmetafy
> once per input string, not once per comparison, and below the call to
> qsort() there's quite a lot of internal string handling. An equally
> simple-minded fix around the call to qsort() (saving and restoring the
> strings) didn't seem to work. So this needs a bit more thought.
Adding an umetafied entry to the glob match that only gets used for
sorting seems to do the trick. I think an additional single pass
through the array of matches isn't a big deal. Possibly the sort code
needs a check through to confirm it really is unmeta-friendly for
globbing as there are different ways in. Any other suggestions?
pws
diff --git a/Src/glob.c b/Src/glob.c
index 2051016..146b4db 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -41,7 +41,10 @@
typedef struct gmatch *Gmatch;
struct gmatch {
+ /* Metafied file name */
char *name;
+ /* Unmetafied file name; embedded nulls can't occur in file names */
+ char *uname;
/*
* Array of sort strings: one for each GS_EXEC sort type in
* the glob qualifiers.
@@ -911,7 +914,8 @@ gmatchcmp(Gmatch a, Gmatch b)
for (i = gf_nsorts, s = gf_sortlist; i; i--, s++) {
switch (s->tp & ~GS_DESC) {
case GS_NAME:
- r = zstrcmp(b->name, a->name, gf_numsort ? SORTIT_NUMERICALLY : 0);
+ r = zstrcmp(b->uname, a->uname,
+ gf_numsort ? SORTIT_NUMERICALLY : 0);
break;
case GS_DEPTH:
{
@@ -1859,6 +1863,7 @@ zglob(LinkList list, LinkNode np, int nountok)
int nexecs = 0;
struct globsort *sortp;
struct globsort *lastsortp = gf_sortlist + gf_nsorts;
+ Gmatch gmptr;
/* First find out if there are any GS_EXECs, counting them. */
for (sortp = gf_sortlist; sortp < lastsortp; sortp++)
@@ -1910,6 +1915,29 @@ zglob(LinkList list, LinkNode np, int nountok)
}
}
+ /*
+ * Where necessary, create unmetafied version of names
+ * for comparison. If no Meta characters just point
+ * to original string. All on heap.
+ */
+ for (gmptr = matchbuf; gmptr < matchptr; gmptr++)
+ {
+ char *nptr;
+ for (nptr = gmptr->name; *nptr; nptr++)
+ {
+ if (*nptr == Meta)
+ break;
+ }
+ if (*nptr == Meta)
+ {
+ int dummy;
+ gmptr->uname = dupstring(gmptr->name);
+ unmetafy(gmptr->uname, &dummy);
+ } else {
+ gmptr->uname = gmptr->name;
+ }
+ }
+
/* Sort arguments in to lexical (and possibly numeric) order. *
* This is reversed to facilitate insertion into the list. */
qsort((void *) & matchbuf[0], matchct, sizeof(struct gmatch),
diff --git a/Test/D07multibyte.ztst b/Test/D07multibyte.ztst
index dedf241..1b1d042 100644
--- a/Test/D07multibyte.ztst
+++ b/Test/D07multibyte.ztst
@@ -562,3 +562,20 @@
}
: $functions)
0:Multibtye handled of functions parameter
+
+ if [[ -n ${$(locale -a 2>/dev/null)[(R)pl_PL.utf8]} ]]; then
+ (
+ export LC_ALL=pl_PL.UTF-8
+ local -a names=(a b c d e f $'\u0105' $'\u0107' $'\u0119')
+ print -o $names
+ mkdir -p plchars
+ cd plchars
+ touch $names
+ print ?
+ )
+ else
+ ZTST_skip="No Polish UTF-8 local found, skipping sort test"
+ fi
+0:Sorting of metafied Polish characters
+>a ą b c ć d e ę f
+>a ą b c ć d e ę f
Messages sorted by:
Reverse Date,
Date,
Thread,
Author