Zsh Mailing List Archive
Messages sorted by:
Reverse Date,
Date,
Thread,
Author
PATCH: sort ordering by function
- X-seq: zsh-workers 26448
- From: Peter Stephenson <pws@xxxxxxx>
- To: zsh-workers@xxxxxxxxxx (Zsh hackers list)
- Subject: PATCH: sort ordering by function
- Date: Mon, 26 Jan 2009 16:34:59 +0000
- Mailing-list: contact zsh-workers-help@xxxxxxxxxx; run by ezmlm
I wrote this because I needed it.
I am faced with a set of directories named <day>-<month>-<year>,
e.g. 26-01-08. The modification times on the directories aren't useful.
I want these in time order.
After the patch, the solution (assuming EXTENDED_GLOB is on and
KSH_ARRAYS isn't, this isn't supposed to be portable, it's just a
trivial example) is
sd() {
local -a match mbegin mend
[[ $REPLY= (#b)(*)-(*)-(*) ]] && REPLY="$match[3]-$match[2]-$match[1]"
}
print -l *(o+sd)
Comments welcome.
Test code should appear when I have time, though if anyone has time to
write test code for this or anything else, that would be fantastic.
Index: Doc/Zsh/expn.yo
===================================================================
RCS file: /cvsroot/zsh/zsh/Doc/Zsh/expn.yo,v
retrieving revision 1.100
diff -u -r1.100 expn.yo
--- Doc/Zsh/expn.yo 17 Nov 2008 16:56:42 -0000 1.100
+++ Doc/Zsh/expn.yo 26 Jan 2009 16:20:05 -0000
@@ -2229,7 +2229,7 @@
item(tt(o)var(c))(
specifies how the names of the files should be sorted. If var(c) is
tt(n) they are sorted by name (the default); if it is tt(L) they
-are sorted depending on the size (length) of the files; if tt(l)
+are sorted depending on the size (length) of the files; if tt(l)
they are sorted by the number of links; if tt(a), tt(m), or tt(c)
they are sorted by the time of the last access, modification, or
inode change respectively; if tt(d), files in subdirectories appear before
@@ -2242,6 +2242,16 @@
so `tt(*(^-oL))' gives a list of all files sorted by file size in descending
order, following any symbolic links. Unless tt(oN) is used, multiple order
specifiers may occur to resolve ties.
+
+tt(oe) and tt(o+) are special cases; they are each followed by shell code,
+delimited as for the tt(e) glob qualifier and the tt(+) glob qualifier
+respectively (see above). The code is executed for each matched file with
+the parameter tt(REPLY) set to the name of the file on entry. The code
+should modify the parameter tt(REPLY) in some fashion. On return, the value
+of the parameter is used instead of the file name as the string on which to
+sort. Unlike other sort operators, tt(oe) and tt(o+) may be repeated, but
+note that the maximum number of sort operators of any kind that may appear
+in any glob expression is 12.
)
item(tt(O)var(c))(
like `tt(o)', but sorts in descending order; i.e. `tt(*(^oc))' is the
Index: Src/glob.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/glob.c,v
retrieving revision 1.68
diff -u -r1.68 glob.c
--- Src/glob.c 8 Nov 2008 06:31:02 -0000 1.68
+++ Src/glob.c 26 Jan 2009 16:20:06 -0000
@@ -42,6 +42,11 @@
struct gmatch {
char *name;
+ /*
+ * Array of sort strings: one for each GS_EXEC sort type in
+ * the glob qualifiers.
+ */
+ char **sortstrs;
off_t size ALIGN64;
long atime;
long mtime;
@@ -68,8 +73,9 @@
#define GS_NAME 1
#define GS_DEPTH 2
+#define GS_EXEC 4
-#define GS_SHIFT_BASE 4
+#define GS_SHIFT_BASE 8
#define GS_SIZE (GS_SHIFT_BASE)
#define GS_ATIME (GS_SHIFT_BASE << 1)
@@ -135,6 +141,17 @@
/**/
mod_export char *glob_pre, *glob_suf;
+/* Element of a glob sort */
+struct globsort {
+ /* Sort type */
+ int tp;
+ /* Sort code to eval, if type is GS_EXEC */
+ char *exec;
+};
+
+/* Maximum entries in sort array */
+#define MAX_SORTS (12)
+
/* struct to easily save/restore current state */
struct globdata {
@@ -157,7 +174,8 @@
int gd_range, gd_amc, gd_units;
int gd_gf_nullglob, gd_gf_markdirs, gd_gf_noglobdots, gd_gf_listtypes;
int gd_gf_numsort;
- int gd_gf_follow, gd_gf_sorts, gd_gf_nsorts, gd_gf_sortlist[11];
+ int gd_gf_follow, gd_gf_sorts, gd_gf_nsorts;
+ struct globsort gd_gf_sortlist[MAX_SORTS];
char *gd_glob_pre, *gd_glob_suf;
};
@@ -880,11 +898,13 @@
static int
gmatchcmp(Gmatch a, Gmatch b)
{
- int i, *s;
+ int i;
off_t r = 0L;
+ struct globsort *s;
+ char **asortstrp = NULL, **bsortstrp = NULL;
for (i = gf_nsorts, s = gf_sortlist; i; i--, s++) {
- switch (*s & ~GS_DESC) {
+ switch (s->tp & ~GS_DESC) {
case GS_NAME:
r = zstrcmp(b->name, a->name, gf_numsort ? SORTIT_NUMERICALLY : 0);
break;
@@ -910,6 +930,17 @@
r = slasha - slashb;
}
break;
+ case GS_EXEC:
+ if (!asortstrp) {
+ asortstrp = a->sortstrs;
+ bsortstrp = b->sortstrs;
+ } else {
+ asortstrp++;
+ bsortstrp++;
+ }
+ r = zstrcmp(*bsortstrp, *asortstrp,
+ gf_numsort ? SORTIT_NUMERICALLY : 0);
+ break;
case GS_SIZE:
r = b->size - a->size;
break;
@@ -966,7 +997,7 @@
break;
}
if (r)
- return (int) ((*s & GS_DESC) ? -r : r);
+ return (int) ((s->tp & GS_DESC) ? -r : r);
}
return 0;
}
@@ -1000,6 +1031,49 @@
return qfirst;
}
+
+/*
+ * Get a glob string for execution, following e or + qualifiers.
+ * Pointer is character after the e or +.
+ */
+
+/**/
+static char *
+glob_exec_string(char **sp)
+{
+ char sav, *tt, *sdata, *s = *sp;
+ int plus;
+
+ if (s[-1] == '+') {
+ plus = 0;
+ tt = itype_end(s, IIDENT, 0);
+ if (tt == s)
+ {
+ zerr("missing identifier after `+'");
+ return NULL;
+ }
+ } else {
+ tt = get_strarg(s, &plus);
+ if (!*tt)
+ {
+ zerr("missing end of string");
+ return NULL;
+ }
+ }
+
+ sav = *tt;
+ *tt = '\0';
+ sdata = dupstring(s + plus);
+ untokenize(sdata);
+ *tt = sav;
+ if (sav)
+ *sp = tt + plus;
+ else
+ *sp = tt;
+
+ return sdata;
+}
+
/* Main entry point to the globbing code for filename globbing. *
* np points to a node in the list list which will be expanded *
* into a series of nodes. */
@@ -1449,7 +1523,16 @@
case 'O':
{
int t;
+ char *send;
+ if (gf_nsorts == MAX_SORTS) {
+ zerr("too many glob sort specifiers");
+ restore_globstate(saved);
+ return;
+ }
+
+ /* usually just one character */
+ send = s+1;
switch (*s) {
case 'n': t = GS_NAME; break;
case 'L': t = GS_SIZE; break;
@@ -1459,60 +1542,50 @@
case 'c': t = GS_CTIME; break;
case 'd': t = GS_DEPTH; break;
case 'N': t = GS_NONE; break;
+ case 'e':
+ case '+':
+ {
+ t = GS_EXEC;
+ if ((gf_sortlist[gf_nsorts].exec =
+ glob_exec_string(&send)) == NULL)
+ {
+ restore_globstate(saved);
+ return;
+ }
+ break;
+ }
default:
zerr("unknown sort specifier");
restore_globstate(saved);
return;
}
- if ((sense & 2) && !(t & (GS_NAME|GS_DEPTH)))
- t <<= GS_SHIFT;
- if (gf_sorts & t) {
- zerr("doubled sort specifier");
- restore_globstate(saved);
- return;
+ if (t != GS_EXEC) {
+ if ((sense & 2) && !(t & (GS_NAME|GS_DEPTH)))
+ t <<= GS_SHIFT; /* HERE: GS_EXEC? */
+ if (gf_sorts & t) {
+ zerr("doubled sort specifier");
+ restore_globstate(saved);
+ return;
+ }
}
gf_sorts |= t;
- gf_sortlist[gf_nsorts++] = t |
+ gf_sortlist[gf_nsorts++].tp = t |
(((sense & 1) ^ (s[-1] == 'O')) ? GS_DESC : 0);
- s++;
+ s = send;
break;
}
case '+':
case 'e':
{
- char sav, *tt;
- int plus;
+ char *tt;
- if (s[-1] == '+') {
- plus = 0;
- tt = itype_end(s, IIDENT, 0);
- if (tt == s)
- {
- zerr("missing identifier after `+'");
- tt = NULL;
- }
- } else {
- tt = get_strarg(s, &plus);
- if (!*tt)
- {
- zerr("missing end of string");
- tt = NULL;
- }
- }
+ tt = glob_exec_string(&s);
if (tt == NULL) {
data = 0;
} else {
- sav = *tt;
- *tt = '\0';
func = qualsheval;
- sdata = dupstring(s + plus);
- untokenize(sdata);
- *tt = sav;
- if (sav)
- s = tt + plus;
- else
- s = tt;
+ sdata = tt;
}
break;
}
@@ -1632,7 +1705,7 @@
return;
}
if (!gf_nsorts) {
- gf_sortlist[0] = gf_sorts = GS_NAME;
+ gf_sortlist[0].tp = gf_sorts = GS_NAME;
gf_nsorts = 1;
}
/* Initialise receptacle for matched files, *
@@ -1665,7 +1738,65 @@
}
}
- if (!(gf_sortlist[0] & GS_NONE)) {
+ if (!(gf_sortlist[0].tp & GS_NONE)) {
+ /*
+ * Get the strings to use for sorting by executing
+ * the code chunk. We allow more than one of these.
+ */
+ int nexecs = 0;
+ struct globsort *sortp;
+ struct globsort *lastsortp = gf_sortlist + gf_nsorts;
+
+ /* First find out if there are any GS_EXECs, counting them. */
+ for (sortp = gf_sortlist; sortp < lastsortp; sortp++)
+ {
+ if (sortp->tp & GS_EXEC)
+ nexecs++;
+ }
+
+ if (nexecs) {
+ Gmatch tmpptr;
+ int iexec = 0;
+
+ /* Yes; allocate enough space for strings for each */
+ for (tmpptr = matchbuf; tmpptr < matchptr; tmpptr++)
+ tmpptr->sortstrs = (char **)zhalloc(nexecs*sizeof(char*));
+
+ /* Loop over each one, incrementing iexec */
+ for (sortp = gf_sortlist; sortp < lastsortp; sortp++)
+ {
+ /* Ignore unless this is a GS_EXEC */
+ if (sortp->tp & GS_EXEC) {
+ Eprog prog;
+
+ if ((prog = parse_string(sortp->exec, 0))) {
+ int ef = errflag, lv = lastval, ret;
+
+ /* Parsed OK, execute for each name */
+ for (tmpptr = matchbuf; tmpptr < matchptr; tmpptr++) {
+ setsparam("REPLY", ztrdup(tmpptr->name));
+ execode(prog, 1, 0);
+ if (!errflag)
+ tmpptr->sortstrs[iexec] =
+ dupstring(getsparam("REPLY"));
+ else
+ tmpptr->sortstrs[iexec] = tmpptr->name;
+ }
+
+ ret = lastval;
+ errflag = ef;
+ lastval = lv;
+ } else {
+ /* Failed, let's be safe */
+ for (tmpptr = matchbuf; tmpptr < matchptr; tmpptr++)
+ tmpptr->sortstrs[iexec] = tmpptr->name;
+ }
+
+ iexec++;
+ }
+ }
+ }
+
/* Sort arguments in to lexical (and possibly numeric) order. *
* This is reversed to facilitate insertion into the list. */
qsort((void *) & matchbuf[0], matchct, sizeof(struct gmatch),
@@ -1682,7 +1813,7 @@
else if (end > matchct)
end = matchct;
if ((end -= first) > 0) {
- if (gf_sortlist[0] & GS_NONE) {
+ if (gf_sortlist[0].tp & GS_NONE) {
/* Match list was never reversed, so insert back to front. */
matchptr = matchbuf + matchct - first - 1;
while (end-- > 0) {
--
Peter Stephenson <pws@xxxxxxx> Software Engineer
CSR PLC, Churchill House, Cambridge Business Park, Cowley Road
Cambridge, CB4 0WZ, UK Tel: +44 (0)1223 692070
Messages sorted by:
Reverse Date,
Date,
Thread,
Author