Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: Strange behavior of [[



pcre.c has the same problem:

% setopt re_match_pcre
% [[ $'\ua0' =~ . ]] && echo OK
(zsh hangs; 100% CPU usage)

The following is a copy of the patch to regex.c in workers/35448.
Also added a simple test in V07pcre.ztst.

diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c
index 2393cd1..aa5c8ed 100644
--- a/Src/Modules/pcre.c
+++ b/Src/Modules/pcre.c
@@ -190,18 +190,25 @@ zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar,
 	if (want_begin_end) {
 	    char *ptr = arg;
 	    zlong offs = 0;
+	    int clen, leftlen;
 
 	    /* Count the characters before the match */
-	    MB_METACHARINIT();
-	    while (ptr < arg + ovec[0]) {
+	    MB_CHARINIT();
+	    leftlen = ovec[0];
+	    while (leftlen) {
 		offs++;
-		ptr += MB_METACHARLEN(ptr);
+		clen = MB_CHARLEN(ptr, leftlen);
+		ptr += clen;
+		leftlen -= clen;
 	    }
 	    setiparam("MBEGIN", offs + !isset(KSHARRAYS));
 	    /* Add on the characters in the match */
-	    while (ptr < arg + ovec[1]) {
+	    leftlen = ovec[1] - ovec[0];
+	    while (leftlen) {
 		offs++;
-		ptr += MB_METACHARLEN(ptr);
+		clen = MB_CHARLEN(ptr, leftlen);
+		ptr += clen;
+		leftlen -= clen;
 	    }
 	    setiparam("MEND", offs + !isset(KSHARRAYS) - 1);
 	    if (nelem) {
@@ -219,17 +226,23 @@ zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar,
 		    ptr = arg;
 		    offs = 0;
 		    /* Find the start offset */
-		    MB_METACHARINIT();
-		    while (ptr < arg + ipair[0]) {
+		    MB_CHARINIT();
+		    leftlen = ipair[0];
+		    while (leftlen) {
 			offs++;
-			ptr += MB_METACHARLEN(ptr);
+			clen = MB_CHARLEN(ptr, leftlen);
+			ptr += clen;
+			leftlen -= clen;
 		    }
 		    convbase(buf, offs + !isset(KSHARRAYS), 10);
 		    *bptr = ztrdup(buf);
 		    /* Continue to the end offset */
-		    while (ptr < arg + ipair[1]) {
+		    leftlen = ipair[1] - ipair[0];
+		    while (leftlen) {
 			offs++;
-			ptr += MB_METACHARLEN(ptr);
+			clen = MB_CHARLEN(ptr, leftlen);
+			ptr += clen;
+			leftlen -= clen;
 		    }
 		    convbase(buf, offs + !isset(KSHARRAYS) - 1, 10);
 		    *eptr = ztrdup(buf);
diff --git a/Test/V07pcre.ztst b/Test/V07pcre.ztst
index ddfd3f5..3907756 100644
--- a/Test/V07pcre.ztst
+++ b/Test/V07pcre.ztst
@@ -37,6 +37,17 @@
 >o→b
 >→
 
+  unset match mend
+  s=$'\u00a0'
+  [[ $s =~ '^.$' ]] && print OK
+  [[ A${s}B =~ .(.). && $match[1] == $s ]] && print OK
+  [[ A${s}${s}B =~ A([^[:ascii:]]*)B && $mend[1] == 3 ]] && print OK
+  unset s
+0:Raw IMETA characters in input string
+>OK
+>OK
+>OK
+
   [[ foo =~ f.+ ]] ; print $?
   [[ foo =~ x.+ ]] ; print $?
   [[ ! foo =~ f.+ ]] ; print $?




Messages sorted by: Reverse Date, Date, Thread, Author