Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Significant assignstrvalue optimization



Hello,
first thing – the dupstring_glen() that was in the code isn't needed –
the value allocated was only read, not written to in any place in the
code:

-           z = dupstring_glen(v->pm->gsu.s->getfn(v->pm), (unsigned*)
&zlen);
+            z = v->pm->gsu.s->getfn(v->pm);
+            zlen = strlen(z);

This gives significant speed up – although only for very large buffers –
attached testopt4bb.zsh:

    i=$(( 20000 ))
    while (( i -- )); do
        a+="aaaaaaaaaaaaaaaaaaaaaaaaa"
    done

runs for 953 ms with optimizations, 1576 ms without them. 

Second: the same that I did with setarrvalue – if we assign a[10]="a",
and 10-th element already exists, i.e. size of string doesn't change,
then instead of:

                x = (char *) zalloc(newsize + 1);
                strncpy(x, z, v->start);
                strcpy(x + v->start, val);
                strcat(x + v->start, z + v->end);
                v->pm->gsu.s->setfn(v->pm, x);

It is done:

                /* Size doesn't change, can limit actions to only
                 * overwriting bytes in already allocated string */
                strncpy(z + v->start, val, vlen);

Condition excludes special parameters:

            /* Does new size differ? */
            if ( newsize != zlen || v->pm->node.flags & PM_SPECIAL ) {

Test code:

a="exampletext"
repeat 12 a+="$a"

strtest() {
    repeat 60000; do
        a[10]="a"
    done
}

runs 366 ms vs 876 ms. The string has length 45056 elements, and there
are as much as 60k repetitions, so previous code was actually already
fast despite the extra zalloc, strncpy, strcat – which is little weird.
That said, the optimization can save one's day in certain situations.

Also added extensive tests of string assignments.

PS. The code again removes "+ 1" from new size computation. The comment
explains why:

            /* Characters preceding start index +
               characters of what is assigned +
               characters following end index */
            newsize = v->start + vlen + (zlen - v->end);

-- 
  Sebastian Gniazdowski
  psprint@xxxxxxxxxxxx
diff --git a/Src/params.c b/Src/params.c
index ef72cba..90f6fdf 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -2436,9 +2436,10 @@ assignstrvalue(Value v, char *val, int flags)
 		v->pm->width = strlen(val);
 	} else {
 	    char *z, *x;
-	    int zlen;
+            int zlen, vlen, newsize;
 
-	    z = dupstring_glen(v->pm->gsu.s->getfn(v->pm), (unsigned*) &zlen);
+            z = v->pm->gsu.s->getfn(v->pm);
+            zlen = strlen(z);
 
 	    if ((v->flags & VALFLAG_INV) && unset(KSHARRAYS))
 		v->start--, v->end--;
@@ -2469,12 +2470,26 @@ assignstrvalue(Value v, char *val, int flags)
 	    }
 	    else if (v->end > zlen)
 		v->end = zlen;
-	    x = (char *) zalloc(v->start + strlen(val) + zlen - v->end + 1);
-	    strncpy(x, z, v->start);
-	    strcpy(x + v->start, val);
-	    strcat(x + v->start, z + v->end);
-	    v->pm->gsu.s->setfn(v->pm, x);
-	    zsfree(val);
+
+            vlen = strlen(val);
+            /* Characters preceding start index +
+               characters of what is assigned +
+               characters following end index */
+            newsize = v->start + vlen + (zlen - v->end);
+
+            /* Does new size differ? */
+            if ( newsize != zlen || v->pm->node.flags & PM_SPECIAL ) {
+                x = (char *) zalloc(newsize + 1);
+                strncpy(x, z, v->start);
+                strcpy(x + v->start, val);
+                strcat(x + v->start, z + v->end);
+                v->pm->gsu.s->setfn(v->pm, x);
+            } else {
+                /* Size doesn't change, can limit actions to only
+                 * overwriting bytes in already allocated string */
+                strncpy(z + v->start, val, vlen);
+            }
+            zsfree(val);
 	}
 	break;
     case PM_INTEGER:
diff --git a/Test/A06assign.ztst b/Test/A06assign.ztst
index da4e3b0..bf39aee 100644
--- a/Test/A06assign.ztst
+++ b/Test/A06assign.ztst
@@ -489,3 +489,143 @@
  print $array
 0:slice beyond length of array
 >FIRST
+
+# tests of string assignments
+
+ a="abc"
+ a[1]=x
+ print $a
+0:overwrite first character in string
+>xbc
+
+ a="abc"
+ a[2]="x"
+ print $a
+0:overwrite middle character in string
+>axc
+
+ a="abc"
+ a[3]="x"
+ print $a
+0:overwrite last character in string
+>abx
+
+ a="abc"
+ a[-1]="x"
+ print $a
+0:overwrite -1 character in string
+>abx
+
+ a="abc"
+ a[-2]="x"
+ print $a
+0:overwrite -2 character (middle) in string
+>axc
+
+ a="ab"
+ a[-2]="x"
+ print $a
+0:overwrite -2 character (first) in string
+>xb
+
+ a="abc"
+ a[-3]="x"
+ print $a
+0:overwrite -3 character (first) in string
+>xbc
+
+ a="abc"
+ a[-4]="x"
+ print $a
+0:overwrite -4 character (before first) in string
+>xabc
+
+ a="abc"
+ a[-5]="x"
+ print $a
+0:overwrite -5 character (before-before first) in string
+>xabc
+
+ a="abc"
+ a[-4,0]="x"
+ print $a
+0:overwrite [-4,0] characters (before first) in string
+>xabc
+
+ a="abc"
+ a[-4,-4]="x"
+ print $a
+0:overwrite [-4,-4] character (before first) in string
+>xabc
+
+ a="abc"
+ a[-40,-30]="x"
+ print $a
+0:overwrite [-40,-30] characters (far before first) in string
+>xabc
+
+ a="abc"
+ a[-40,1]="x"
+ print $a
+0:overwrite [-40,1] characters in short string
+>xbc
+
+ a="abc"
+ a[-40,40]="x"
+ print $a
+0:overwrite [-40,40] characters in short string
+>x
+
+ a="abc"
+ a[2,40]="x"
+ print $a
+0:overwrite [2,40] characters in short string
+>ax
+
+ a="abc"
+ a[2,-1]="x"
+ print $a
+0:overwrite [2,-1] characters in short string
+>ax
+
+ a="abc"
+ a[-2,-1]="x"
+ print $a
+0:overwrite [-2,-1] characters in short string
+>ax
+
+ a="a"
+ a[-1]="xx"
+ print $a
+0:overwrite [-1] character with "xx"
+>xx
+
+ a="a"
+ a[-2]="xx"
+ print $a
+0:overwrite [-2] character (before first) with "xx"
+>xxa
+
+ a="a"
+ a[2]="xx"
+ print $a
+0:overwrite [2] character (after last) with "xx"
+>axx
+
+ a=""
+ a[1]="xx"
+ print $a
+0:overwrite [1] character (string: "") with "xx"
+>xx
+
+ a=""
+ a[-1]="xx"
+ print $a
+0:overwrite [-1] character (string: "") with "xx"
+>xx
+
+ a=""
+ a[2]="xx"
+ print $a
+0:overwrite [2] character (string: "") with "xx"
+>xx

Attachment: testopt4bb.zsh
Description: Binary data

Attachment: testopt8.zsh
Description: Binary data



Messages sorted by: Reverse Date, Date, Thread, Author