Zsh Mailing List Archive
Messages sorted by:
Reverse Date,
Date,
Thread,
Author
Re: PATCH: Re: Broken completion with UTF-8 description
- X-seq: zsh-workers 22761
- From: Andrey Borzenkov <arvidjaar@xxxxxxxxxx>
- To: zsh-workers@xxxxxxxxxx
- Subject: Re: PATCH: Re: Broken completion with UTF-8 description
- Date: Sat, 23 Sep 2006 11:05:30 +0400
- In-reply-to: <20060921210251.8d84fdb1.p.w.stephenson@xxxxxxxxxxxx>
- Mailing-list: contact zsh-workers-help@xxxxxxxxxx; run by ezmlm
- References: <200609171853.57050.arvidjaar@xxxxxxxxxx> <200609212104.23198.arvidjaar@xxxxxxxxxx> <20060921210251.8d84fdb1.p.w.stephenson@xxxxxxxxxxxx>
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
On Friday 22 September 2006 00:02, Peter Stephenson wrote:
>
> Looking in more detail, cd_sort is passed as an argument to qsort(), so
> it may be called many times while sorting the arguments. It's therefore
> probably better to do the conversion in the caller, of which there's
> only one at line 270, i.e. the str elements of the values in the array
> grps should be copied to an array of unmetafied strings, and then simply
> call strpcmp() in cd_sort. In fact, since you're doing a copy anyway
> you could probably convert the array to a format where you can pass
> strpcmp() as the final argument and eliminate cd_sort() altogether.
>
> That ought to improve your speed problem quite a bit.
Actually it became slow even before I added unmetafy; this is likely due to
O(n**2) string width calculations done when copying description.
Anyway, you are right of course. The modified patch does it, and also relaxes
string width calculation - it now assumes that width of string is sum of
widths of constituting multibyte characters. If this turns out to be wrong,
we have to put up with slow down ...
I will commit it in this form if there are now objections.
- -andrey
Index: Src/subst.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/subst.c,v
retrieving revision 1.61
diff -u -p -r1.61 subst.c
- --- Src/subst.c 20 Sep 2006 09:22:34 -0000 1.61
+++ Src/subst.c 23 Sep 2006 06:59:19 -0000
@@ -581,7 +581,7 @@ strcatsub(char **d, char *pb, char *pe,
typedef int (*CompareFn) _((const void *, const void *));
/**/
- -int
+mod_export int
strpcmp(const void *a, const void *b)
{
#ifdef HAVE_STRCOLL
Index: Src/Zle/computil.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/computil.c,v
retrieving revision 1.95
diff -u -p -r1.95 computil.c
- --- Src/Zle/computil.c 17 Sep 2006 19:23:39 -0000 1.95
+++ Src/Zle/computil.c 23 Sep 2006 06:59:19 -0000
@@ -33,6 +33,13 @@
/* Help for `_describe'. */
+/*
+ * FIXME this should be defined globally. I have to find other places
+ * where it is used
+ * */
+
+#define INTERMATCH_GAP 2
+
typedef struct cdset *Cdset;
typedef struct cdstr *Cdstr;
typedef struct cdrun *Cdrun;
@@ -40,16 +47,18 @@ typedef struct cdrun *Cdrun;
struct cdstate {
int showd; /* != 0 if descriptions should be shown */
char *sep; /* the separator string */
- - int slen; /* its length */
+ int slen; /* its metafied length */
+ int swidth; /* its screen width */
int maxmlen; /* maximum length to allow for the matches */
Cdset sets; /* the sets of matches */
- - int pre; /* longest prefix (before description) */
+ int pre; /* longest prefix length (before description)
*/
+ int premaxw; /* ... and its screen width */
int suf; /* longest suffix (description) */
int maxg; /* size of largest group */
int maxglen; /* columns for matches of largest group */
int groups; /* number of groups */
int descs; /* number of non-group matches with desc */
- - int gpre; /* prefix length for group display */
+ int gprew; /* prefix screen width for group display */
Cdrun runs; /* runs to report to shell code */
};
@@ -58,7 +67,9 @@ struct cdstr {
char *str; /* the string to display */
char *desc; /* the description or NULL */
char *match; /* the match to add */
+ char *sortstr; /* unmetafied string used to sort matches */
int len; /* length of str or match */
+ int width; /* ... and its screen width */
Cdstr other; /* next string with the same description */
int kind; /* 0: not in a group, 1: the first, 2: other
*/
Cdset set; /* the set this string is in */
@@ -102,6 +113,7 @@ freecdsets(Cdset p)
freearray(p->opts);
for (s = p->strs; s; s = sn) {
sn = s->next;
+ zfree(s->sortstr, strlen(s->str) + 1);
zsfree(s->str);
zsfree(s->desc);
if (s->match != s->str)
@@ -123,7 +135,7 @@ cd_group(int maxg)
{
Cdset set1, set2;
Cdstr str1, str2, *strp;
- - int num, len;
+ int num, width;
cd_state.groups = cd_state.descs = cd_state.maxglen = 0;
cd_state.maxg = 0;
@@ -140,20 +152,20 @@ cd_group(int maxg)
continue;
num = 1;
- - len = str1->len + cd_state.slen;
- - if (len > cd_state.maxglen)
- - cd_state.maxglen = len;
+ width = str1->width + cd_state.swidth;
+ if (width > cd_state.maxglen)
+ cd_state.maxglen = width;
strp = &(str1->other);
for (set2 = set1; set2; set2 = set2->next) {
for (str2 = (set2 == set1 ? str1->next : set2->strs);
str2; str2 = str2->next)
if (str2->desc && !strcmp(str1->desc, str2->desc)) {
- - len += 2 + str2->len;
- - if (len > cd_state.maxmlen || num == maxg)
+ width += INTERMATCH_GAP + str2->width;
+ if (width > cd_state.maxmlen || num == maxg)
break;
- - if (len > cd_state.maxglen)
- - cd_state.maxglen = len;
+ if (width > cd_state.maxglen)
+ cd_state.maxglen = width;
str1->kind = 1;
str2->kind = 2;
num++;
@@ -194,6 +206,8 @@ cd_calc()
set->count++;
if ((l = strlen(str->str)) > cd_state.pre)
cd_state.pre = l;
+ if ((l = MB_METASTRWIDTH(str->str)) > cd_state.premaxw)
+ cd_state.premaxw = l;
if (str->desc) {
set->desc++;
if ((l = strlen(str->desc)) > cd_state.suf)
@@ -206,7 +220,7 @@ cd_calc()
static int
cd_sort(const void *a, const void *b)
{
- - return strcmp((*((Cdstr *) a))->str, (*((Cdstr *) b))->str);
+ return strpcmp(&(*((Cdstr *) a))->sortstr, &(*((Cdstr *) b))->sortstr);
}
static int
@@ -234,8 +248,8 @@ cd_prep()
for (str = set->strs; str; str = str->next) {
if (str->kind != 1) {
if (!str->kind && str->desc) {
- - if (str->len > wids[0])
- - wids[0] = str->len;
+ if (str->width > wids[0])
+ wids[0] = str->width;
str->other = NULL;
*strp++ = str;
}
@@ -248,25 +262,34 @@ cd_prep()
for (; gp; gp = gn) {
gn = gp->other;
gp->other = NULL;
- - for (gpp = &gs; *gpp && (*gpp)->len > gp->len;
+ for (gpp = &gs; *gpp && (*gpp)->width > gp->width;
gpp = &((*gpp)->other));
gp->other = *gpp;
*gpp = gp;
}
for (gp = gs, i = 0; gp; gp = gp->other, i++)
- - if (gp->len > wids[i])
- - wids[i] = gp->len;
+ if (gp->width > wids[i])
+ wids[i] = gp->width;
*strp++ = gs;
}
- - cd_state.gpre = 0;
- - for (i = 0; i < cd_state.maxg; i++)
- - cd_state.gpre += wids[i] + 2;
+ cd_state.gprew = 0;
+ for (i = 0; i < cd_state.maxg; i++) {
+ cd_state.gprew += wids[i] + INTERMATCH_GAP;
+ }
- - if (cd_state.gpre > cd_state.maxmlen && cd_state.maxglen > 1)
+ if (cd_state.gprew > cd_state.maxmlen && cd_state.maxglen > 1)
return 1;
+ for (i = 0; i < lines; i++) {
+ Cdstr s = grps[i];
+ int dummy;
+
+ s->sortstr = ztrdup(s->str);
+ unmetafy(s->sortstr, &dummy);
+ }
+
qsort(grps, lines, sizeof(Cdstr), cd_sort);
for (i = lines, strp = grps; i > 1; i--, strp++) {
@@ -443,12 +466,13 @@ cd_init(char *nam, char *hide, char *mle
}
setp = &(cd_state.sets);
cd_state.sep = ztrdup(sep);
- - cd_state.slen = ztrlen(sep);
+ cd_state.slen = strlen(sep);
+ cd_state.swidth = MB_METASTRWIDTH(sep);
cd_state.sets = NULL;
cd_state.showd = disp;
cd_state.maxg = cd_state.groups = cd_state.descs = 0;
cd_state.maxmlen = atoi(mlen);
- - itmp = columns - cd_state.slen - 4;
+ itmp = columns - cd_state.swidth - 4;
if (cd_state.maxmlen > itmp)
cd_state.maxmlen = itmp;
if (cd_state.maxmlen < 4)
@@ -489,6 +513,8 @@ cd_init(char *nam, char *hide, char *mle
*tmp = '\0';
str->str = str->match = ztrdup(rembslash(*ap));
str->len = strlen(str->str);
+ str->width = MB_METASTRWIDTH(str->str);
+ str->sortstr = NULL;
}
if (str)
str->next = NULL;
@@ -600,38 +626,61 @@ cd_get(char **params)
case CRT_DESC:
{
+ /*
+ * The buffer size:
+ * max prefix length (cd_state.pre) +
+ * max padding (cd_state.premaxw generously :) +
+ * separator length (cd_state.slen) +
+ * inter matches gap (INTERMATCH_GAP) +
+ * max description length (cd_state.suf) +
+ * trailing \0
+ */
VARARR(char, buf,
- - cd_state.pre + cd_state.suf + cd_state.slen + 3);
- - char *sufp = NULL;
- -
- - memcpy(buf + cd_state.pre + 2, cd_state.sep, cd_state.slen);
- - buf[cd_state.pre] = buf[cd_state.pre + 1] = ' ';
- - sufp = buf + cd_state.pre + cd_state.slen + 2;
- -
+ cd_state.pre + cd_state.suf +
+ cd_state.premaxw + cd_state.slen + 3);
mats = mp = (char **) zalloc((run->count + 1) * sizeof(char
*));
dpys = dp = (char **) zalloc((run->count + 1) * sizeof(char
*));
for (str = run->strs; str; str = str->run) {
+ char *p = buf, *pp, *d;
+ int l, remw, w;
+
*mp++ = ztrdup(str->match);
- - memset(buf, ' ', cd_state.pre);
- - memcpy(buf, str->str, str->len);
- - strcpy(sufp, str->desc);
- - if (MB_METASTRWIDTH(buf) >= columns - 1) {
- - char *termptr = buf;
- - int w;
- - MB_METACHARINIT();
- - for (w = columns - 1; *termptr && w > 0; ) {
- - convchar_t cchar;
- - int cw;
- - termptr += MB_METACHARLENCONV(termptr, &cchar);
- - cw = WCWIDTH(cchar);
- - if (cw >= 0)
- - w -= cw;
- - else
- - w--;
+ strcpy(p, str->str);
+ p += str->len;
+ memset(p, ' ', (l = (cd_state.premaxw - str->width +
INTERMATCH_GAP)));
+ p += l;
+ strcpy(p, cd_state.sep);
+ p += cd_state.slen;
+
+ /*
+ * copy a character at once until no more screen width
+ * is available. Leave 1 character at the end of screen
+ * as safety margin
+ */
+ remw = columns - cd_state.premaxw - cd_state.swidth - 3;
+ d = str->desc;
+ w = MB_METASTRWIDTH(d);
+ if (w <= remw)
+ strcpy(p, d);
+ else {
+ pp = p;
+ while (remw > 0 && *d) {
+ l = MB_METACHARLEN(d);
+ memcpy(pp, d, l);
+ pp[l] = '\0';
+ w = MB_METASTRWIDTH(pp);
+ if (w > remw) {
+ *pp = '\0';
+ break;
+ }
+
+ pp += l;
+ d += l;
+ remw -= w;
}
- - *termptr = '\0';
}
+
*dp++ = ztrdup(buf);
}
*mp = *dp = NULL;
@@ -684,10 +733,10 @@ cd_get(char **params)
default: /* This silences the "might be used uninitialized" warnings */
case CRT_EXPL:
{
- - int dlen = columns - cd_state.gpre - cd_state.slen;
- - VARARR(char, dbuf, dlen + cd_state.slen);
- - char buf[20];
- - int i = run->count;
+ /* add columns as safety margin */
+ VARARR(char, dbuf, cd_state.suf + cd_state.slen + columns);
+ char buf[20], *p, *pp, *d;
+ int i = run->count, remw, w, l;
sprintf(buf, "-E%d", i);
@@ -699,13 +748,36 @@ cd_get(char **params)
*dp++ = ztrdup("");
continue;
}
- - memset(dbuf + cd_state.slen, ' ', dlen - 1);
- - dbuf[dlen + cd_state.slen - 1] = '\0';
+
strcpy(dbuf, cd_state.sep);
- - memcpy(dbuf + cd_state.slen,
- - str->desc,
- - ((int)strlen(str->desc) >= dlen ? dlen - 1 :
- - (int)strlen(str->desc)));
+ remw = columns - cd_state.gprew - cd_state.swidth - INTERMATCH_GAP;
+ p = pp = dbuf + cd_state.slen;
+ d = str->desc;
+ w = MB_METASTRWIDTH(d);
+ if (w <= remw) {
+ strcpy(p, d);
+ remw -= w;
+ pp += strlen(d);
+ } else
+ while (remw > 0 && *d) {
+ l = MB_METACHARLEN(d);
+ memcpy(pp, d, l);
+ pp[l] = '\0';
+ w = MB_METASTRWIDTH(pp);
+ if (w > remw) {
+ *pp = '\0';
+ break;
+ }
+
+ pp += l;
+ d += l;
+ remw -= w;
+ }
+
+ while (remw-- > 0)
+ *pp++ = ' ';
+ *pp = '\0';
+
*dp++ = ztrdup(dbuf);
}
mats[0] = *dp = NULL;
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.5 (GNU/Linux)
iD8DBQFFFNy8R6LMutpd94wRAtuXAKCdagFuUTvkXWYTHWo1whRcmwbwhQCfQv8c
pSa0X+lYrgYPkPV3GwF689w=
=78F3
-----END PGP SIGNATURE-----
Messages sorted by:
Reverse Date,
Date,
Thread,
Author