Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

multibyte optimisations



On Thu, 10 Nov 2016 02:37:12 -0800
Sebastian Gniazdowski <psprint@xxxxxxxxxxxx> wrote:
> Other pointed functions seem to be very valid / expected – multibyte
> functions. They can be optimized if a courageous decision will be made –
> to do what charnext / pattern.c does:
> 
>     if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*x) & 0x80))
>         return x + 1;
> 
> I.e. to optimize for ASCII as subset of UTF-8 also when calling
> MB_METACHARLEN, not only for MB_METASTRLEN (recent change).

These look straightforward and along the same lines as what we already
do.

pws

diff --git a/Src/utils.c b/Src/utils.c
index 3d535b8..cceaf4c 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -84,7 +84,15 @@ set_widearray(char *mb_array, Widechar_array wca)
 
 	mb_charinit();
 	while (*mb_array) {
-	    int mblen = mb_metacharlenconv(mb_array, &wci);
+	    int mblen;
+
+	    if (STOUC(*mb_array) <= 0x7f) {
+		mb_array++;
+		*wcptr++ = (wchar_t)*mb_array;
+		continue;
+	    }
+
+	    mblen = mb_metacharlenconv(mb_array, &wci);
 
 	    if (!mblen)
 		break;
@@ -5249,6 +5257,12 @@ mb_metacharlenconv_r(const char *s, wint_t *wcp, mbstate_t *mbsp)
     const char *ptr;
     wchar_t wc;
 
+    if (STOUC(*s) <= 0x7f) {
+	if (wcp)
+	    *wcp = (wint_t)*s;
+	return 1;
+    }
+
     for (ptr = s; *ptr; ) {
 	if (*ptr == Meta) {
 	    inchar = *++ptr ^ 32;
@@ -5301,7 +5315,7 @@ mb_metacharlenconv_r(const char *s, wint_t *wcp, mbstate_t *mbsp)
 mod_export int
 mb_metacharlenconv(const char *s, wint_t *wcp)
 {
-    if (!isset(MULTIBYTE)) {
+    if (!isset(MULTIBYTE) || STOUC(*s) <= 0x7f) {
 	/* treat as single byte, possibly metafied */
 	if (wcp)
 	    *wcp = (wint_t)(*s == Meta ? s[1] ^ 32 : *s);
@@ -5442,6 +5456,12 @@ mb_charlenconv_r(const char *s, int slen, wint_t *wcp, mbstate_t *mbsp)
     const char *ptr;
     wchar_t wc;
 
+    if (slen && STOUC(*s) <= 0x7f) {
+	if (wcp)
+	    *wcp = (wint_t)*s;
+	return 1;
+    }
+
     for (ptr = s; slen;  ) {
 	inchar = *ptr;
 	ptr++;
@@ -5477,7 +5497,7 @@ mb_charlenconv_r(const char *s, int slen, wint_t *wcp, mbstate_t *mbsp)
 mod_export int
 mb_charlenconv(const char *s, int slen, wint_t *wcp)
 {
-    if (!isset(MULTIBYTE)) {
+    if (!isset(MULTIBYTE) || STOUC(*s) <= 0x7f) {
 	if (wcp)
 	    *wcp = (wint_t)*s;
 	return 1;



Messages sorted by: Reverse Date, Date, Thread, Author