Zsh Mailing List Archive
Messages sorted by:
Reverse Date,
Date,
Thread,
Author
Re: PATCH: Apply spell correction to autocd
- X-seq: zsh-workers 20892
- From: Bart Schaefer <schaefer@xxxxxxxxxxxxxxxx>
- To: zsh-workers@xxxxxxxxxx
- Subject: Re: PATCH: Apply spell correction to autocd
- Date: Mon, 28 Feb 2005 18:14:15 +0000
- In-reply-to: <200502281044.j1SAiJPP005690@xxxxxxxxxxxxxx>
- Mailing-list: contact zsh-workers-help@xxxxxxxxxx; run by ezmlm
- References: <1050227204407.ZM19297@xxxxxxxxxxxxxxxxxxxxxxx> <1050228065407.ZM20816@xxxxxxxxxxxxxxxxxxxxxxx> <200502281044.j1SAiJPP005690@xxxxxxxxxxxxxx>
On Feb 28, 10:44am, Peter Stephenson wrote:
}
} I don't think the internal spellchecking stuff has ever had a major
} overhaul (as distinct from having extra bits grafted on). It's not
} surprising if it's weird. I expect tidying it up would be a good idea.
OK, here's a stab at it. See embedded comments (gasp). Apply this instead
of the previous (20882) patch, not on top of it.
Index: Src/utils.c
===================================================================
RCS file: /extra/cvsroot/zsh/zsh-4.0/Src/utils.c,v
retrieving revision 1.21
diff -c -r1.21 utils.c
--- Src/utils.c 18 Feb 2005 17:05:17 -0000 1.21
+++ Src/utils.c 28 Feb 2005 18:06:44 -0000
@@ -1647,11 +1659,12 @@
mod_export void
spckword(char **s, int hist, int cmd, int ask)
{
- char *t, *u;
+ char *t;
int x;
char ic = '\0';
int ne;
int preflen = 0;
+ int autocd = cmd && isset(AUTOCD) && strcmp(*s, ".") && strcmp(*s, "..");
if ((histdone & HISTFLAG_NOEXEC) || **s == '-' || **s == '%')
return;
@@ -1715,8 +1728,7 @@
}
if (access(unmeta(guess), F_OK) == 0)
return;
- if ((u = spname(guess)) != guess)
- best = u;
+ best = spname(guess);
if (!*t && cmd) {
if (hashcmd(guess, pathchecked))
return;
@@ -1726,12 +1738,28 @@
scanhashtable(shfunctab, 1, 0, 0, spscan, 0);
scanhashtable(builtintab, 1, 0, 0, spscan, 0);
scanhashtable(cmdnamtab, 1, 0, 0, spscan, 0);
+ if (autocd) {
+ char **pp;
+ for (pp = cdpath; *pp; pp++) {
+ char bestcd[PATH_MAX + 1];
+ int thisdist;
+ /* Less than d here, instead of less than or equal *
+ * as used in spscan(), so that an autocd is chosen *
+ * only when it is better than anything so far, and *
+ * so we prefer directories earlier in the cdpath. */
+ if ((thisdist = mindist(*pp, *s, bestcd)) < d) {
+ best = dupstring(bestcd);
+ d = thisdist;
+ }
+ }
+ }
}
}
if (errflag)
return;
if (best && (int)strlen(best) > 1 && strcmp(best, guess)) {
if (ic) {
+ char *u;
if (preflen) {
/* do not correct the result of an expansion */
if (strncmp(guess, best, preflen))
@@ -2421,10 +2449,14 @@
{
char *p, spnameguess[PATH_MAX + 1], spnamebest[PATH_MAX + 1];
static char newname[PATH_MAX + 1];
- char *new = newname, *old;
- int bestdist = 200, thisdist;
+ char *new = newname, *old = oldname;
+ int bestdist = 0, thisdist, thresh, maxthresh = 0;
- old = oldname;
+ /* This loop corrects each directory component of the path, stopping *
+ * when any correction distance would exceed the distance threshold. *
+ * NULL is returned only if the first component cannot be corrected; *
+ * otherwise a copy of oldname with a corrected prefix is returned. *
+ * Rationale for this, if there ever was any, has been forgotten. */
for (;;) {
while (*old == '/')
*new++ = *old++;
@@ -2436,15 +2468,29 @@
if (p < spnameguess + PATH_MAX)
*p++ = *old;
*p = '\0';
- if ((thisdist = mindist(newname, spnameguess, spnamebest)) >= 3) {
- if (bestdist < 3) {
+ /* Every component is allowed a single distance 2 correction or two *
+ * distance 1 corrections. Longer ones get additional corrections. */
+ thresh = (int)(p - spnameguess) / 4 + 1;
+ if (thresh < 3)
+ thresh = 3;
+ if ((thisdist = mindist(newname, spnameguess, spnamebest)) >= thresh) {
+ /* The next test is always true, except for the first path *
+ * component. We could initialize bestdist to some large *
+ * constant instead, and then compare to that constant here, *
+ * because an invariant is that we've never exceeded the *
+ * threshold for any component so far; but I think that looks *
+ * odd to the human reader, and we may make use of the total *
+ * distance for all corrections at some point in the future. */
+ if (bestdist < maxthresh) {
strcpy(new, spnameguess);
strcat(new, old);
return newname;
} else
return NULL;
- } else
- bestdist = thisdist;
+ } else {
+ maxthresh = bestdist + thresh;
+ bestdist += thisdist;
+ }
for (p = spnamebest; (*new = *p++);)
new++;
}
@@ -2487,6 +2533,7 @@
static int
spdist(char *s, char *t, int thresh)
{
+ /* TODO: Correction for non-ASCII and multibyte-input keyboards. */
char *p, *q;
const char qwertykeymap[] =
"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\
@@ -2520,7 +2567,7 @@
if (!strcmp(s, t))
return 0;
-/* any number of upper/lower mistakes allowed (dist = 1) */
+ /* any number of upper/lower mistakes allowed (dist = 1) */
for (p = s, q = t; *p && tulower(*p) == tulower(*q); p++, q++);
if (!*p && !*q)
return 1;
@@ -2544,7 +2591,7 @@
int t0;
char *z;
- /* mistyped letter */
+ /* mistyped letter */
if (!(z = strchr(keymap, p[0])) || *z == '\n' || *z == '\t')
return spdist(p + 1, q + 1, thresh - 1) + 1;
Messages sorted by:
Reverse Date,
Date,
Thread,
Author