Zsh Mailing List Archive
Messages sorted by:
Reverse Date,
Date,
Thread,
Author
PATCH: print -S uses lexical history
- X-seq: zsh-workers 29413
- From: Peter Stephenson <p.w.stephenson@xxxxxxxxxxxx>
- To: zsh-workers@xxxxxxx (Zsh hackers list)
- Subject: PATCH: print -S uses lexical history
- Date: Tue, 31 May 2011 21:37:17 +0100
- List-help: <mailto:zsh-workers-help@zsh.org>
- List-id: Zsh Workers List <zsh-workers.zsh.org>
- List-post: <mailto:zsh-workers@zsh.org>
- Mailing-list: contact zsh-workers-help@xxxxxxx; run by ezmlm
As we've had all the pain of the HIST_LEX_WORDS option, which almost no
one is using, to split a history line into words using the lexical
analyser, it's worth making this available directly using a relative of
"print -s" imaginatively called "print -S". This just takes a single
argument which will be split in the same way as if read in from a
history file with HIST_LEX_WORDS on. This doesn't give you very much
beyond print -s with a (z)plit parameter, but as the code's already
there it's worth having.
I've split the core code out into a new function, histsplitwords().
Index: Doc/Zsh/builtins.yo
===================================================================
RCS file: /cvsroot/zsh/zsh/Doc/Zsh/builtins.yo,v
retrieving revision 1.135
diff -p -u -r1.135 builtins.yo
--- Doc/Zsh/builtins.yo 19 May 2011 16:10:47 -0000 1.135
+++ Doc/Zsh/builtins.yo 31 May 2011 20:06:27 -0000
@@ -914,7 +914,7 @@ and the new directory stack is not print
tt(popd) that do not change the environment seen by an interactive user.
)
findex(print)
-xitem(tt(print) [ tt(-abcDilmnNoOpPrsz) ] [ tt(-u) var(n) ] [ tt(-f) var(format) ] [ tt(-C) var(cols) ])
+xitem(tt(print) [ tt(-abcDilmnNoOpPrsSz) ] [ tt(-u) var(n) ] [ tt(-f) var(format) ] [ tt(-C) var(cols) ])
item( [ tt(-R) [ tt(-en) ]] [ var(arg) ... ])(
With the `tt(-f)' option the arguments are printed as described by tt(printf).
With no flags or with the flag `tt(-)', the arguments are printed on
@@ -994,6 +994,14 @@ tt(-R); all other arguments and options
)
item(tt(-s))(
Place the results in the history list instead of on the standard output.
+Each argument to the tt(print) command is treated as a single word in the
+history, regardless of its content.
+)
+item(tt(-S))(
+Place the results in the history list instead of on the standard output.
+In this case only a single argument is allowed; it will be split into
+words by analysing the line as if it had been read from the history
+file with the tt(HIST_LEX_WORDS) option.
)
item(tt(-u) var(n))(
Print the arguments to file descriptor var(n).
Index: Src/builtin.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/builtin.c,v
retrieving revision 1.251
diff -p -u -r1.251 builtin.c
--- Src/builtin.c 9 May 2011 09:49:09 -0000 1.251
+++ Src/builtin.c 31 May 2011 20:06:27 -0000
@@ -99,7 +99,7 @@ static struct builtin builtins[] =
#endif
BUILTIN("popd", BINF_SKIPINVALID | BINF_SKIPDASH | BINF_DASHDASHVALID, bin_cd, 0, 1, BIN_POPD, "q", NULL),
- BUILTIN("print", BINF_PRINTOPTS, bin_print, 0, -1, BIN_PRINT, "abcC:Df:ilmnNoOpPrRsu:z-", NULL),
+ BUILTIN("print", BINF_PRINTOPTS, bin_print, 0, -1, BIN_PRINT, "abcC:Df:ilmnNoOpPrRsSu:z-", NULL),
BUILTIN("printf", 0, bin_print, 1, -1, BIN_PRINTF, NULL, NULL),
BUILTIN("pushd", BINF_SKIPINVALID | BINF_SKIPDASH | BINF_DASHDASHVALID, bin_cd, 0, 2, BIN_PUSHD, "qsPL", NULL),
BUILTIN("pushln", 0, bin_print, 0, -1, BIN_PRINT, NULL, "-nz"),
@@ -3965,25 +3965,45 @@ bin_print(char *name, char **args, Optio
return 0;
}
/* -s option -- add the arguments to the history list */
- if (OPT_ISSET(ops,'s')) {
+ if (OPT_ISSET(ops,'s') || OPT_ISSET(ops,'S')) {
int nwords = 0, nlen, iwords;
char **pargs = args;
queue_signals();
- ent = prepnexthistent();
while (*pargs++)
nwords++;
- if ((ent->nwords = nwords)) {
- ent->words = (short *)zalloc(nwords*2*sizeof(short));
- nlen = iwords = 0;
- for (pargs = args; *pargs; pargs++) {
- ent->words[iwords++] = nlen;
- nlen += strlen(*pargs);
- ent->words[iwords++] = nlen;
- nlen++;
+ if (nwords) {
+ if (OPT_ISSET(ops,'S')) {
+ int wordsize;
+ short *words;
+ if (nwords > 1) {
+ zwarnnam(name, "option -S takes a single argument");
+ return 1;
+ }
+ words = NULL;
+ wordsize = 0;
+ histsplitwords(*args, &words, &wordsize, &nwords, 1);
+ ent = prepnexthistent();
+ ent->words = (short *)zalloc(nwords*sizeof(short));
+ memcpy(ent->words, words, nwords*sizeof(short));
+ free(words);
+ ent->nwords = nwords/2;
+ } else {
+ ent = prepnexthistent();
+ ent->words = (short *)zalloc(nwords*2*sizeof(short));
+ ent->nwords = nwords;
+ nlen = iwords = 0;
+ for (pargs = args; *pargs; pargs++) {
+ ent->words[iwords++] = nlen;
+ nlen += strlen(*pargs);
+ ent->words[iwords++] = nlen;
+ nlen++;
+ }
}
- } else
+ } else {
+ ent = prepnexthistent();
ent->words = (short *)NULL;
+ }
ent->node.nam = zjoin(args, ' ', 0);
ent->stim = ent->ftim = time(NULL);
ent->node.flags = 0;
Index: Src/hist.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/hist.c,v
retrieving revision 1.111
diff -p -u -r1.111 hist.c
--- Src/hist.c 27 May 2011 21:56:14 -0000 1.111
+++ Src/hist.c 31 May 2011 20:06:27 -0000
@@ -2338,110 +2338,11 @@ readhistfile(char *fn, int err, int read
/*
* Divide up the words.
*/
- nwordpos = 0;
start = pt;
uselex = isset(HISTLEXWORDS) && !(readflags & HFILE_FAST);
- if (uselex) {
- /*
- * Attempt to do this using the lexer.
- */
- LinkList wordlist = bufferwords(NULL, pt, NULL,
- LEXFLAGS_COMMENTS_KEEP);
- LinkNode wordnode;
- int nwords_max;
- nwords_max = 2 * countlinknodes(wordlist);
- if (nwords_max > nwords) {
- nwords = nwords_max;
- words = (short *)realloc(words, nwords*sizeof(short));
- }
- for (wordnode = firstnode(wordlist);
- wordnode;
- incnode(wordnode)) {
- char *word = getdata(wordnode);
-
- for (;;) {
- /*
- * Not really an oddity: "\\\n" is
- * removed from input as if whitespace.
- */
- if (inblank(*pt))
- pt++;
- else if (pt[0] == '\\' && pt[1] == '\n')
- pt += 2;
- else
- break;
- }
- if (!strpfx(word, pt)) {
- int bad = 0;
- /*
- * Oddity 1: newlines turn into semicolons.
- */
- if (!strcmp(word, ";"))
- continue;
- while (*pt) {
- if (!*word) {
- bad = 1;
- break;
- }
- /*
- * Oddity 2: !'s turn into |'s.
- */
- if (*pt == *word ||
- (*pt == '!' && *word == '|')) {
- pt++;
- word++;
- } else {
- bad = 1;
- break;
- }
- }
- if (bad) {
-#ifdef DEBUG
- dputs(ERRMSG("bad wordsplit reading history: "
- "%s\nat: %s\nword: %s"),
- start, pt, word);
-#endif
- pt = start;
- nwordpos = 0;
- uselex = 0;
- break;
- }
- } else if (!strcmp(word, ";") && strpfx(";;", pt)) {
- /*
- * Don't get confused between a semicolon that's
- * probably really a newline and a double
- * semicolon that's terminating a case.
- */
- continue;
- }
- words[nwordpos++] = pt - start;
- pt += strlen(word);
- words[nwordpos++] = pt - start;
- }
+ histsplitwords(pt, &words, &nwords, &nwordpos, uselex);
+ if (uselex)
freeheap();
- }
- if (!uselex) {
- do {
- for (;;) {
- if (inblank(*pt))
- pt++;
- else if (pt[0] == '\\' && pt[1] == '\n')
- pt += 2;
- else
- break;
- }
- if (*pt) {
- if (nwordpos >= nwords)
- words = (short *)
- realloc(words, (nwords += 64)*sizeof(short));
- words[nwordpos++] = pt - start;
- while (*pt && !inblank(*pt))
- pt++;
- words[nwordpos++] = pt - start;
- }
- } while (*pt);
-
- }
he->nwords = nwordpos/2;
if (he->nwords) {
@@ -3141,6 +3042,142 @@ bufferwords(LinkList list, char *buf, in
return list;
}
+/*
+ * Split up a line into words for use in a history file.
+ *
+ * lineptr is the line to be split.
+ *
+ * *wordsp and *nwordsp are an array already allocated to hold words
+ * and its length. The array holds both start and end positions,
+ * so *nwordsp actually counts twice the number of words in the
+ * original string. *nwordsp may be zero in which case the array
+ * will be allocated.
+ *
+ * *nwordposp returns the used length of *wordsp in the same units as
+ * *nwordsp, i.e. twice the number of words in the input line.
+ *
+ * If uselex is 1, attempt to do this using the lexical analyser.
+ * This is more accurate, but slower; for reading history files it's
+ * controlled by the option HISTLEXWORDS. If this failed (which
+ * indicates a bug in the shell) it falls back to whitespace-separated
+ * strings, printing a message if in debug mode.
+ *
+ * If uselex is 0, just look for whitespace-separated words; the only
+ * special handling is for a backslash-newline combination as used
+ * by the history file format to save multiline buffers.
+ */
+/**/
+mod_export void
+histsplitwords(char *lineptr, short **wordsp, int *nwordsp, int *nwordposp,
+ int uselex)
+{
+ int nwords = *nwordsp, nwordpos = 0;
+ short *words = *wordsp;
+ char *start = lineptr;
+
+ if (uselex) {
+ LinkList wordlist = bufferwords(NULL, lineptr, NULL,
+ LEXFLAGS_COMMENTS_KEEP);
+ LinkNode wordnode;
+ int nwords_max;
+
+ nwords_max = 2 * countlinknodes(wordlist);
+ if (nwords_max > nwords) {
+ *nwordsp = nwords = nwords_max;
+ *wordsp = words = (short *)zrealloc(words, nwords*sizeof(short));
+ }
+ for (wordnode = firstnode(wordlist);
+ wordnode;
+ incnode(wordnode)) {
+ char *word = getdata(wordnode);
+
+ for (;;) {
+ /*
+ * Not really an oddity: "\\\n" is
+ * removed from input as if whitespace.
+ */
+ if (inblank(*lineptr))
+ lineptr++;
+ else if (lineptr[0] == '\\' && lineptr[1] == '\n')
+ lineptr += 2;
+ else
+ break;
+ }
+ if (!strpfx(word, lineptr)) {
+ int bad = 0;
+ /*
+ * Oddity 1: newlines turn into semicolons.
+ */
+ if (!strcmp(word, ";"))
+ continue;
+ while (*lineptr) {
+ if (!*word) {
+ bad = 1;
+ break;
+ }
+ /*
+ * Oddity 2: !'s turn into |'s.
+ */
+ if (*lineptr == *word ||
+ (*lineptr == '!' && *word == '|')) {
+ lineptr++;
+ word++;
+ } else {
+ bad = 1;
+ break;
+ }
+ }
+ if (bad) {
+#ifdef DEBUG
+ dputs(ERRMSG("bad wordsplit reading history: "
+ "%s\nat: %s\nword: %s"),
+ start, lineptr, word);
+#endif
+ lineptr = start;
+ nwordpos = 0;
+ uselex = 0;
+ break;
+ }
+ } else if (!strcmp(word, ";") && strpfx(";;", lineptr)) {
+ /*
+ * Don't get confused between a semicolon that's
+ * probably really a newline and a double
+ * semicolon that's terminating a case.
+ */
+ continue;
+ }
+ words[nwordpos++] = lineptr - start;
+ lineptr += strlen(word);
+ words[nwordpos++] = lineptr - start;
+ }
+ }
+ if (!uselex) {
+ do {
+ for (;;) {
+ if (inblank(*lineptr))
+ lineptr++;
+ else if (lineptr[0] == '\\' && lineptr[1] == '\n')
+ lineptr += 2;
+ else
+ break;
+ }
+ if (*lineptr) {
+ if (nwordpos >= nwords) {
+ *nwordsp = nwords = nwords + 64;
+ *wordsp = words = (short *)
+ zrealloc(words, nwords*sizeof(*words));
+ }
+ words[nwordpos++] = lineptr - start;
+ while (*lineptr && !inblank(*lineptr))
+ lineptr++;
+ words[nwordpos++] = lineptr - start;
+ }
+ } while (*lineptr);
+ }
+
+ *nwordposp = nwordpos;
+}
+
/* Move the current history list out of the way and prepare a fresh history
* list using hf for HISTFILE, hs for HISTSIZE, and shs for SAVEHIST. If
* the hf value is an empty string, HISTFILE will be unset from the new
--
Peter Stephenson <p.w.stephenson@xxxxxxxxxxxx>
Web page now at http://homepage.ntlworld.com/p.w.stephenson/
Messages sorted by:
Reverse Date,
Date,
Thread,
Author