Zsh Mailing List Archive
Messages sorted by:
Reverse Date,
Date,
Thread,
Author
PATCH: zsh/regex and =~
- X-seq: zsh-workers 23340
- From: Phil Pennock <zsh-workers+phil.pennock@xxxxxxxxxxxx>
- To: zsh-workers@xxxxxxxxxx
- Subject: PATCH: zsh/regex and =~
- Date: Sat, 28 Apr 2007 00:56:35 -0700
- Domainkey-signature: a=rsa-sha1; q=dns; c=nofws; s=first1; d=spodhuis.org; h=Received:Date:From:To:Subject:Message-ID:Mail-Followup-To:MIME-Version:Content-Type:Content-Disposition; b=ToHmdRJQkjbOtJ68zXG5AaS6ksDd7G9THDJ/5jgXN4N89ZzGLNTZQ4GDRrEXUfOLL2p9LLzPEX0fU0g5EZCciReKdrWKB+QmmD0Wba8ISYVIdm4iYCa7etRRVPYfnA5BXDgoxCA7NvCyKLj7wc1OyHMdjhjanEwDG/oAX9UjSPU=;
- Mail-followup-to: zsh-workers@xxxxxxxxxx
- Mailing-list: contact zsh-workers-help@xxxxxxxxxx; run by ezmlm
[ Sorry for not having one diff which creates new files, but "cvs diff -N"
seems to be ignoring that little 'N' ]
The attached patch and files, which includes documentation, adds a new
loadable module, zsh/regex. I've not examined widechar issues and which
regex libraries actually do handle these. I've not looked at linkage
issues on platforms where regex (the POSIX interface, not regexp) is not
a part of libc.
This also includes my previous =~ work, replacing the previous patch.
I'm not sure that auto-unsetting REMATCH_PCRE is a good idea, so invite
comments; also as to which should be the default value; I suppose that
if pcre is not the default, then the warning can be put back in ...
My only test platform has been freebsd/amd64.
I've also cleaned up various memory leaks in zsh/pcre.
zsh/pcre now also sets $MATCH, not just $match.
I went with having $BASH_REMATCH be set instead of, rather than in
addition to, $MATCH and $match. I'm again very open to persuasion here.
Oh, and the copyright notice in regex.c seems a bit disjointed, with
multiple names. What's the copyright policy on newly contributed files?
zsh/regex provides the -regex-match conditional operator, the knowledge
of -regex-match and -pcre-match remains in cond.c with the COND_REGEX
handling for =~.
Also, I've decided that I much prefer the PCRE API to the POSIX regex
API. :-) I'm off to drink more wine to recover.
-Phil
Index: Doc/Zsh/cond.yo
===================================================================
RCS file: /cvsroot/zsh/zsh/Doc/Zsh/cond.yo,v
retrieving revision 1.3
diff -p -u -r1.3 cond.yo
--- Doc/Zsh/cond.yo 22 May 2000 15:01:35 -0000 1.3
+++ Doc/Zsh/cond.yo 28 Apr 2007 07:42:51 -0000
@@ -109,6 +109,11 @@ backward compatibility and should be con
item(var(string) tt(!=) var(pattern))(
true if var(string) does not match var(pattern).
)
+item(var(string) tt(=~) var(regexp))(
+true if var(string) matches the PCRE regular expression
+var(regexp). Requires the tt(zsh/pcre) module to be present,
+which is a compile-time option.
+)
item(var(string1) tt(<) var(string2))(
true if var(string1) comes before var(string2)
based on ASCII value of their characters.
Index: Doc/Zsh/mod_pcre.yo
===================================================================
RCS file: /cvsroot/zsh/zsh/Doc/Zsh/mod_pcre.yo,v
retrieving revision 1.5
diff -p -u -r1.5 mod_pcre.yo
--- Doc/Zsh/mod_pcre.yo 20 Jun 2004 22:47:18 -0000 1.5
+++ Doc/Zsh/mod_pcre.yo 28 Apr 2007 07:42:51 -0000
@@ -22,14 +22,17 @@ Studies the previously-compiled PCRE whi
matching.
)
findex(pcre_match)
-item(tt(pcre_match) [ tt(-a) var(arr) ] var(string))(
+item(tt(pcre_match) [ tt(-v) var(var) ] [ tt(-a) var(arr) ] var(string))(
Returns successfully if tt(string) matches the previously-compiled
PCRE.
If the expression captures substrings within parentheses,
tt(pcre_match) will set the array var($match) to those
substrings, unless the tt(-a) option is given, in which
-case it will set the array var(arr).
+case it will set the array var(arr). Similarly, the variable
+var($MATCH) will be set to the entire matched portion of the
+string, unless the tt(-v) option is given, in which var it will
+set the variable var(var).
)
enditem()
Index: Doc/Zsh/options.yo
===================================================================
RCS file: /cvsroot/zsh/zsh/Doc/Zsh/options.yo,v
retrieving revision 1.53
diff -p -u -r1.53 options.yo
--- Doc/Zsh/options.yo 5 Mar 2007 17:35:18 -0000 1.53
+++ Doc/Zsh/options.yo 28 Apr 2007 07:42:51 -0000
@@ -478,6 +478,19 @@ var(xx) is set to tt(LPAR())var(a b c)tt
`var(fooabar foobbar foocbar)' instead of the default
`var(fooa b cbar)'.
)
+pindex(REMATCH_PCRE)
+cindex(regexp, PCRE)
+cindex(PCRE, regexp)
+item(tt(REMATCH_PCRE) <Z>)(
+If set, regular expression matching with the tt(=~) operator will use
+Perl-Compatible Regular Expressions from the PCRE library, if available.
+If not set, regular expressions will use the extended regexp syntax
+provided by the system libraries.
+Experimental:
+When zsh is invoked as tt(zsh), this option is initially set, but may be
+unset if the tt(zsh/pcre) module can not be loaded. This behaviour, as
+well as the default status, is subject to change.
+)
pindex(SH_GLOB)
cindex(sh, globbing style)
cindex(globbing style, sh)
@@ -1131,6 +1144,20 @@ enditem()
subsect(Shell Emulation)
startitem()
+pindex(BASH_REMATCH)
+cindex(bash, BASH_REMATCH variable)
+cindex(regexp, bash BASH_REMATCH variable)
+item(tt(BASH_REMATCH))(
+When set, matches performed with the tt(=~) operator will set the
+tt(BASH_REMATCH) array variable, instead of the default tt(MATCH) and
+tt(match) variables. The first element of the tt(BASH_REMATCH) array
+will contain the entire matched text and subsequent elements will contain
+extracted substrings. This option makes more sense when tt(KSH_ARRAYS) is
+also set, so that the entire matched portion is stored at index 0 and the
+first substring is at index 1. Without this option, the tt(MATCH) variable
+contains the entire matched text and the tt(match) array variable will
+the substrings.
+)
pindex(BSD_ECHO)
cindex(echo, BSD compatible)
item(tt(BSD_ECHO) <S>)(
Index: Src/cond.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/cond.c,v
retrieving revision 1.8
diff -p -u -r1.8 cond.c
--- Src/cond.c 30 May 2006 22:35:03 -0000 1.8
+++ Src/cond.c 28 Apr 2007 07:42:51 -0000
@@ -34,7 +34,7 @@ int tracingcond;
static char *condstr[COND_MOD] = {
"!", "&&", "||", "==", "!=", "<", ">", "-nt", "-ot", "-ef", "-eq",
- "-ne", "-lt", "-gt", "-le", "-ge"
+ "-ne", "-lt", "-gt", "-le", "-ge", "=~"
};
/*
@@ -53,14 +53,14 @@ int
evalcond(Estate state, char *fromtest)
{
struct stat *st;
- char *left, *right;
+ char *left, *right, *overridename;
Wordcode pcode;
wordcode code;
int ctype, htok = 0, ret;
rec:
- left = right = NULL;
+ left = right = overridename = NULL;
pcode = state->pc++;
code = *pcode;
ctype = WC_COND_TYPE(code);
@@ -92,13 +92,42 @@ evalcond(Estate state, char *fromtest)
state->pc = pcode + (WC_COND_SKIP(code) + 1);
return ret;
}
+ case COND_REGEX:
+ {
+ int loaded = 0;
+ if (isset(REMATCHPCRE)) {
+ loaded = load_module_silence("zsh/pcre", 1);
+ if (loaded) {
+ overridename = "-pcre-match";
+ } else {
+ dosetopt(REMATCHPCRE, 0, 1);
+#if 0
+ zwarnnam(fromtest, "zsh/pcre not available for regex");
+ return 2;
+#endif
+ }
+ }
+ if (!loaded) {
+ loaded = load_module_silence("zsh/regex", 1);
+ if (loaded) {
+ overridename = "-regex-match";
+ } else {
+ zwarnnam(fromtest, "zsh/regex not available for regex");
+ return 2;
+ }
+ }
+ ctype = COND_MODI;
+ }
case COND_MOD:
case COND_MODI:
{
Conddef cd;
- char *name = ecgetstr(state, EC_NODUP, NULL), **strs;
+ char *name = overridename;
+ char **strs;
int l = WC_COND_SKIP(code);
+ if (name == NULL)
+ name = ecgetstr(state, EC_NODUP, NULL);
if (ctype == COND_MOD)
strs = ecgetarr(state, l, EC_DUP, NULL);
else {
@@ -139,7 +168,8 @@ evalcond(Estate state, char *fromtest)
return !cd->handler(strs, cd->condid);
} else {
zwarnnam(fromtest,
- "unrecognized condition: `%s'", name);
+ "unrecognized condition: `%s'",
+ name ? name : "<null>");
}
}
/* module not found, error */
Index: Src/options.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/options.c,v
retrieving revision 1.35
diff -p -u -r1.35 options.c
--- Src/options.c 15 Mar 2007 15:16:58 -0000 1.35
+++ Src/options.c 28 Apr 2007 07:42:51 -0000
@@ -88,6 +88,7 @@ static struct optname optns[] = {
{{NULL, "banghist", OPT_NONBOURNE}, BANGHIST},
{{NULL, "bareglobqual", OPT_EMULATE|OPT_ZSH}, BAREGLOBQUAL},
{{NULL, "bashautolist", 0}, BASHAUTOLIST},
+{{NULL, "bashrematch", 0}, BASHREMATCH},
{{NULL, "beep", OPT_ALL}, BEEP},
{{NULL, "bgnice", OPT_EMULATE|OPT_NONBOURNE},BGNICE},
{{NULL, "braceccl", OPT_EMULATE}, BRACECCL},
@@ -201,6 +202,7 @@ static struct optname optns[] = {
{{NULL, "rcquotes", OPT_EMULATE}, RCQUOTES},
{{NULL, "rcs", OPT_ALL}, RCS},
{{NULL, "recexact", 0}, RECEXACT},
+{{NULL, "rematchpcre", OPT_ZSH}, REMATCHPCRE},
{{NULL, "restricted", OPT_SPECIAL}, RESTRICTED},
{{NULL, "rmstarsilent", OPT_BOURNE}, RMSTARSILENT},
{{NULL, "rmstarwait", 0}, RMSTARWAIT},
Index: Src/parse.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/parse.c,v
retrieving revision 1.64
diff -p -u -r1.64 parse.c
--- Src/parse.c 23 Apr 2007 17:24:23 -0000 1.64
+++ Src/parse.c 28 Apr 2007 07:42:52 -0000
@@ -2124,6 +2124,12 @@ par_cond_triple(char *a, char *b, char *
ecstr(a);
ecstr(c);
ecadd(ecnpats++);
+ } else if ((b[0] == Equals || b[0] == '=') &&
+ (b[1] == '~' || b[1] == Tilde) && ~b[2]) {
+ ecadd(WCB_COND(COND_REGEX, 0));
+ ecstr(a);
+ ecstr(c);
+ ecadd(ecnpats++);
} else if (b[0] == '-') {
if ((t0 = get_cond_num(b + 1)) > -1) {
ecadd(WCB_COND(t0 + COND_NT, 0));
Index: Src/text.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/text.c,v
retrieving revision 1.19
diff -p -u -r1.19 text.c
--- Src/text.c 23 Apr 2007 15:24:00 -0000 1.19
+++ Src/text.c 28 Apr 2007 07:42:52 -0000
@@ -640,7 +640,7 @@ gettext2(Estate state)
{
static char *c1[] = {
"=", "!=", "<", ">", "-nt", "-ot", "-ef", "-eq",
- "-ne", "-lt", "-gt", "-le", "-ge"
+ "-ne", "-lt", "-gt", "-le", "-ge", "=~"
};
int ctype;
@@ -724,7 +724,7 @@ gettext2(Estate state)
}
break;
default:
- if (ctype <= COND_GE) {
+ if (ctype < COND_MOD) {
/* Binary test: `a = b' etc. */
taddstr(ecgetstr(state, EC_NODUP, NULL));
taddstr(" ");
Index: Src/zsh.h
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/zsh.h,v
retrieving revision 1.112
diff -p -u -r1.112 zsh.h
--- Src/zsh.h 29 Mar 2007 21:35:39 -0000 1.112
+++ Src/zsh.h 28 Apr 2007 07:42:53 -0000
@@ -519,8 +519,9 @@ struct timedfn {
#define COND_GT 13
#define COND_LE 14
#define COND_GE 15
-#define COND_MOD 16
-#define COND_MODI 17
+#define COND_REGEX 16
+#define COND_MOD 17
+#define COND_MODI 18
typedef int (*CondHandler) _((char **, int));
@@ -1588,6 +1589,7 @@ enum {
BANGHIST,
BAREGLOBQUAL,
BASHAUTOLIST,
+ BASHREMATCH,
BEEP,
BGNICE,
BRACECCL,
@@ -1695,6 +1697,7 @@ enum {
RCQUOTES,
RCS,
RECEXACT,
+ REMATCHPCRE,
RESTRICTED,
RMSTARSILENT,
RMSTARWAIT,
Index: Src/Modules/pcre.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Modules/pcre.c,v
retrieving revision 1.11
diff -p -u -r1.11 pcre.c
--- Src/Modules/pcre.c 5 Apr 2007 16:20:15 -0000 1.11
+++ Src/Modules/pcre.c 28 Apr 2007 07:42:53 -0000
@@ -3,7 +3,7 @@
*
* This file is part of zsh, the Z shell.
*
- * Copyright (c) 2001, 2002, 2003, 2004 Clint Adams
+ * Copyright (c) 2001, 2002, 2003, 2004, 2007 Clint Adams
* All rights reserved.
*
* Permission is hereby granted, without written agreement and without
@@ -42,6 +42,37 @@ static pcre_extra *pcre_hints;
/**/
static int
+zpcre_utf8_enabled(void)
+{
+#if defined(MULTIBYTE_SUPPORT) && defined(HAVE_NL_LANGINFO) && defined(CODESET)
+ static int have_utf8_pcre = -1;
+
+ /* value can toggle based on MULTIBYTE, so don't
+ * be too eager with caching */
+ if (have_utf8_pcre < -1)
+ return 0;
+
+ if (!isset(MULTIBYTE))
+ return 0;
+
+ if ((have_utf8_pcre == -1) &&
+ (!strcmp(nl_langinfo(CODESET), "UTF-8"))) {
+
+ if (pcre_config(PCRE_CONFIG_UTF8, &have_utf8_pcre))
+ have_utf8_pcre = -2; /* erk, failed to ask */
+ }
+
+ if (have_utf8_pcre < 0)
+ return 0;
+ return have_utf8_pcre;
+
+#else
+ return 0;
+#endif
+}
+
+/**/
+static int
bin_pcre_compile(char *nam, char **args, Options ops, UNUSED(int func))
{
int pcre_opts = 0, pcre_errptr;
@@ -52,8 +83,14 @@ bin_pcre_compile(char *nam, char **args,
if(OPT_ISSET(ops,'m')) pcre_opts |= PCRE_MULTILINE;
if(OPT_ISSET(ops,'x')) pcre_opts |= PCRE_EXTENDED;
+ if (zpcre_utf8_enabled())
+ pcre_opts |= PCRE_UTF8;
+
pcre_hints = NULL; /* Is this necessary? */
+ if (pcre_pattern)
+ pcre_free(pcre_pattern);
+
pcre_pattern = pcre_compile(*args, pcre_opts, &pcre_error, &pcre_errptr, NULL);
if (pcre_pattern == NULL)
@@ -100,37 +137,52 @@ bin_pcre_study(char *nam, UNUSED(char **
/**/
static int
-zpcre_get_substrings(char *arg, int *ovec, int ret, char *receptacle)
+zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar, char *substravar, int matchedinarr)
{
- char **captures, **matches;
+ char **captures, **match_all, **matches;
+ int capture_start = 1;
- if(!pcre_get_substring_list(arg, ovec, ret, (const char ***)&captures)) {
-
- matches = zarrdup(&captures[1]); /* first one would be entire string */
- if (receptacle == NULL)
- setaparam("match", matches);
- else
- setaparam(receptacle, matches);
-
- pcre_free_substring_list((const char **)captures);
- }
+ if (matchedinarr)
+ capture_start = 0;
+ if (matchvar == NULL)
+ matchvar = "MATCH";
+ if (substravar == NULL)
+ substravar = "match";
+
+ /* captures[0] will be entire matched string, [1] first substring */
+ if(!pcre_get_substring_list(arg, ovec, ret, (const char ***)&captures)) {
+ match_all = ztrdup(captures[0]);
+ setsparam(matchvar, match_all);
+ matches = zarrdup(&captures[capture_start]);
+ setaparam(substravar, matches);
+ pcre_free_substring_list((const char **)captures);
+ }
- return 0;
+ return 0;
}
/**/
static int
bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
{
- int ret, capcount, *ovec, ovecsize;
+ int ret, capcount, *ovec, ovecsize, c;
+ char *matched_portion = NULL;
char *receptacle = NULL;
+ int return_value = 1;
+
+ if (pcre_pattern == NULL) {
+ zwarnnam(nam, "no pattern has been compiled");
+ return 1;
+ }
- if(OPT_ISSET(ops,'a')) {
- receptacle = *args++;
- if(!*args) {
- zwarnnam(nam, "not enough arguments");
- return 1;
- }
+ if(OPT_HASARG(ops,c='a')) {
+ receptacle = OPT_ARG(ops,c);
+ }
+ if(OPT_HASARG(ops,c='v')) {
+ matched_portion = OPT_ARG(ops,c);
+ }
+ if(!*args) {
+ zwarnnam(nam, "not enough arguments");
}
if ((ret = pcre_fullinfo(pcre_pattern, pcre_hints, PCRE_INFO_CAPTURECOUNT, &capcount)))
@@ -144,18 +196,20 @@ bin_pcre_match(char *nam, char **args, O
ret = pcre_exec(pcre_pattern, pcre_hints, *args, strlen(*args), 0, 0, ovec, ovecsize);
- if (ret==0) return 0;
- else if (ret==PCRE_ERROR_NOMATCH) return 1; /* no match */
+ if (ret==0) return_value = 0;
+ else if (ret==PCRE_ERROR_NOMATCH) /* no match */;
else if (ret>0) {
- zpcre_get_substrings(*args, ovec, ret, receptacle);
- return 0;
+ zpcre_get_substrings(*args, ovec, ret, matched_portion, receptacle, 0);
+ return_value = 0;
}
else {
zwarnnam(nam, "error in pcre_exec");
- return 1;
}
- return 1;
+ if (ovec)
+ zfree(ovec, ovecsize*sizeof(int));
+
+ return return_value;
}
/**/
@@ -164,33 +218,63 @@ cond_pcre_match(char **a, int id)
{
pcre *pcre_pat;
const char *pcre_err;
- char *lhstr, *rhre;
+ char *lhstr, *rhre, *avar=NULL;
int r = 0, pcre_opts = 0, pcre_errptr, capcnt, *ov, ovsize;
+ int return_value = 0;
+
+ if (zpcre_utf8_enabled())
+ pcre_opts |= PCRE_UTF8;
lhstr = cond_str(a,0,0);
rhre = cond_str(a,1,0);
+ pcre_pat = ov = NULL;
+
+ if (isset(BASHREMATCH))
+ avar="BASH_REMATCH";
switch(id) {
case CPCRE_PLAIN:
- pcre_pat = pcre_compile(rhre, pcre_opts, &pcre_err, &pcre_errptr, NULL);
- pcre_fullinfo(pcre_pat, NULL, PCRE_INFO_CAPTURECOUNT, &capcnt);
- ovsize = (capcnt+1)*3;
- ov = zalloc(ovsize*sizeof(int));
- r = pcre_exec(pcre_pat, NULL, lhstr, strlen(lhstr), 0, 0, ov, ovsize);
- if (r==0) return 1;
+ pcre_pat = pcre_compile(rhre, pcre_opts, &pcre_err, &pcre_errptr, NULL);
+ if (pcre_pat == NULL) {
+ zwarn("failed to compile regexp /%s/: %s", rhre, pcre_err);
+ break;
+ }
+ pcre_fullinfo(pcre_pat, NULL, PCRE_INFO_CAPTURECOUNT, &capcnt);
+ ovsize = (capcnt+1)*3;
+ ov = zalloc(ovsize*sizeof(int));
+ r = pcre_exec(pcre_pat, NULL, lhstr, strlen(lhstr), 0, 0, ov, ovsize);
+ /* r < 0 => error; r==0 match but not enough size in ov
+ * r > 0 => (r-1) substrings found; r==1 => no substrings
+ */
+ if (r==0) {
+ zwarn("reportable zsh problem: pcre_exec() returned 0");
+ return_value = 1;
+ break;
+ }
else if (r==PCRE_ERROR_NOMATCH) return 0; /* no match */
+ else if (r<0) {
+ zwarn("pcre_exec() error: %d", r);
+ break;
+ }
else if (r>0) {
- zpcre_get_substrings(lhstr, ov, r, NULL);
- return 1;
+ zpcre_get_substrings(lhstr, ov, r, NULL, avar, isset(BASHREMATCH));
+ return_value = 1;
+ break;
}
break;
}
- return 0;
+ if (pcre_pat)
+ pcre_free(pcre_pat);
+ if (ov)
+ zfree(ov, ovsize*sizeof(int));
+
+ return return_value;
}
static struct conddef cotab[] = {
CONDDEF("pcre-match", CONDF_INFIX, cond_pcre_match, 0, 0, CPCRE_PLAIN)
+ /* CONDDEF can register =~ but it won't be found */
};
/**/
@@ -206,7 +290,7 @@ static struct conddef cotab[] = {
static struct builtin bintab[] = {
BUILTIN("pcre_compile", 0, bin_pcre_compile, 1, 1, 0, "aimx", NULL),
BUILTIN("pcre_study", 0, bin_pcre_study, 0, 0, 0, NULL, NULL),
- BUILTIN("pcre_match", 0, bin_pcre_match, 1, 2, 0, "a", NULL)
+ BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "a:v:", NULL)
};
name=zsh/regex
link=dynamic
load=no
autobins=""
objects="regex.o"
/*
* regex.c
*
* This file is part of zsh, the Z shell.
*
* Copyright (c) 2007 Phil Pennock
* All Rights Reserved.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and to distribute modified versions of this software for any
* purpose, provided that the above copyright notice and the following
* two paragraphs appear in all copies of this software.
*
* In no event shall Clint Adams or the Zsh Development Group be liable
* to any party for direct, indirect, special, incidental, or consequential
* damages arising out of the use of this software and its documentation,
* even if Andrew Main and the Zsh Development Group have been advised of
* the possibility of such damage.
*
* Clint Adams and the Zsh Development Group specifically disclaim any
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose. The software
* provided hereunder is on an "as is" basis, and Andrew Main and the
* Zsh Development Group have no obligation to provide maintenance,
* support, updates, enhancements, or modifications.
*
*/
#include "regex.mdh"
#include "regex.pro"
#include <regex.h>
/* we default to a vaguely modern syntax and set of capabilities */
#define ZREGEX_EXTENDED 0
/* if you want Basic syntax, make it an alternative options */
static void
zregex_regerrwarn(int r, regex_t *re, char *msg)
{
char *errbuf;
size_t errbufsz;
errbufsz = regerror(r, re, NULL, 0);
errbuf = zalloc(errbufsz*sizeof(char));
regerror(r, re, errbuf, errbufsz);
zwarn("%s: %s", msg, errbuf);
zfree(errbuf, errbufsz);
}
/**/
static int
zcond_regex_match(char **a, int id)
{
regex_t re;
regmatch_t *m, *matches = NULL;
size_t matchessz;
char *lhstr, *rhre, *s, **arr, **x;
int r, n, return_value, rcflags, reflags, nelem, start;
lhstr = cond_str(a,0,0);
rhre = cond_str(a,1,0);
rcflags = reflags = 0;
return_value = 0; /* 1 => matched successfully */
switch(id) {
case ZREGEX_EXTENDED:
rcflags |= REG_EXTENDED;
r = regcomp(&re, rhre, rcflags);
if (r) {
zregex_regerrwarn(r, &re, "failed to compile regex");
break;
}
/* re.re_nsub is number of parenthesized groups, we also need
* 1 for the 0 offset, which is the entire matched portion
*/
matchessz = (re.re_nsub + 1) * sizeof(regmatch_t);
matches = zalloc(matchessz);
r = regexec(&re, lhstr, re.re_nsub+1, matches, reflags);
if (r == REG_NOMATCH) /**/;
else if (r == 0) {
return_value = 1;
if (isset(BASHREMATCH)) {
start = 0;
nelem = re.re_nsub + 1;
} else {
start = 1;
nelem = re.re_nsub;
}
/* entire matched portion + re_nsub substrings + NULL */
if (nelem) {
arr = x = (char **) zalloc(sizeof(char *) * (nelem + 1));
for (m = matches + start, n = start; n <= re.re_nsub; ++n, ++m, ++x) {
*x = ztrduppfx(lhstr + m->rm_so, m->rm_eo - m->rm_so);
}
*x = NULL;
}
if (isset(BASHREMATCH)) {
setaparam("BASH_REMATCH", arr);
} else {
m = matches;
s = ztrduppfx(lhstr + m->rm_so, m->rm_eo - m->rm_so);
setsparam("MATCH", s);
if (nelem)
setaparam("match", arr);
}
}
else zregex_regerrwarn(r, &re, "regex matching error");
break;
}
if (matches)
zfree(matches, matchessz);
regfree(&re);
return return_value;
}
static struct conddef cotab[] = {
CONDDEF("regex-match", CONDF_INFIX, zcond_regex_match, 0, 0, ZREGEX_EXTENDED)
};
/**/
int
setup_(UNUSED(Module m))
{
return 0;
}
/**/
int
boot_(Module m)
{
return !addconddefs(m->nam, cotab, sizeof(cotab)/sizeof(*cotab));
}
/**/
int
cleanup_(Module m)
{
deleteconddefs(m->nam, cotab, sizeof(cotab)/sizeof(*cotab));
return 0;
}
/**/
int
finish_(UNUSED(Module m))
{
return 0;
}
Messages sorted by:
Reverse Date,
Date,
Thread,
Author