Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: [PATCH] Enable sub-second timeout in zsystem flock



dana (Thursday 2020-03-12):
> 2. This particular test doesn't seem reliable on my machine. Within the test
>    harness, it normally takes about 0.078 seconds. Probably the fork over-head
>    (which is pretty high on macOS) is greater than the amount of time you're
>    giving it wait? If i change `zselect -t 1` to `zselect -t 10` it seems to
>    work better... but it still feels very brittle. Very much dependent on the
>    hardware, the OS, and the current resource utilisation

I see.  Here's a new version of the patch.  As long as the tests are
run sequentially and not in parallel, there shouldn't be any race
condition left.  Instead of just waiting 10 ms and hoping that the
sub-shell has had time to start in the background, I'm now actually
testing the presence of a file that it creates.

It might still fail if the background sub-shell completes, including
the several-tenths-of-a-second wait, before the next part of the test
is run.  Do you think it's still too likely?

I've also addressed the "else false" and the numbering change, and
added some comments to make it clearer.

					Best regards,
					Cedric Ware.


diff -ruN zsh-5.8.orig/Doc/Zsh/mod_system.yo zsh-5.8/Doc/Zsh/mod_system.yo
--- zsh-5.8.orig/Doc/Zsh/mod_system.yo	2020-01-13 06:39:15.000000000 +0100
+++ zsh-5.8/Doc/Zsh/mod_system.yo	2020-03-08 18:39:32.672683779 +0100
@@ -166,7 +166,7 @@
 printed in the last case, but the parameter tt(ERRNO) will reflect
 the error that occurred.
 )
-xitem(tt(zsystem flock) [ tt(-t) var(timeout) ] [ tt(-f) var(var) ] [tt(-er)] var(file))
+xitem(tt(zsystem flock) [ tt(-t) var(timeout) ] [ tt(-i) var(interval) ] [ tt(-f) var(var) ] [tt(-er)] var(file))
 item(tt(zsystem flock -u) var(fd_expr))(
 The builtin tt(zsystem)'s subcommand tt(flock) performs advisory file
 locking (via the manref(fcntl)(2) system call) over the entire contents
@@ -196,9 +196,16 @@
 
 By default the shell waits indefinitely for the lock to succeed.
 The option tt(-t) var(timeout) specifies a timeout for the lock in
-seconds; currently this must be an integer.  The shell will attempt
-to lock the file once a second during this period.  If the attempt
-times out, status 2 is returned.
+seconds; fractional seconds are allowed.  During this period, the
+shell will attempt to lock the file every var(interval) seconds
+if the tt(-i) var(interval) option is given, otherwise once a second.
+(This var(interval) is shortened before the last attempt if needed,
+so that the shell waits only until the var(timeout) and not longer.)
+If the attempt times out, status 2 is returned.
+
+(Note: var(interval) must be less than LONG_MAX microseconds.
+This is many millenia on 64-bit systems, but only about 35 minutes
+on 32-bit systems.)
 
 If the option tt(-e) is given, the file descriptor for the lock is
 preserved when the shell uses tt(exec) to start a new process;
diff -ruN zsh-5.8.orig/Src/Modules/system.c zsh-5.8/Src/Modules/system.c
--- zsh-5.8.orig/Src/Modules/system.c	2020-02-06 20:53:18.000000000 +0100
+++ zsh-5.8/Src/Modules/system.c	2020-03-08 18:43:02.756951315 +0100
@@ -532,6 +532,9 @@
 {
     int cloexec = 1, unlock = 0, readlock = 0;
     zlong timeout = -1;
+    double timeout_tmp;
+    long timeout_retry = 1e6;
+    mnumber timeout_param;
     char *fdvar = NULL;
 #ifdef HAVE_FCNTL_H
     struct flock lck;
@@ -583,7 +586,44 @@
 		} else {
 		    optarg = *args++;
 		}
-		timeout = mathevali(optarg);
+		timeout_param = matheval(optarg);
+		if (!(timeout_param.type & MN_FLOAT)) {
+		    timeout_param.type = MN_FLOAT;
+		    timeout_param.u.d = (double)timeout_param.u.l;
+		}
+		timeout_tmp = timeout_param.u.d * 1e6;
+		if ((timeout_tmp < 1) || (timeout_tmp > ZLONG_MAX / 2)) {
+		    zwarnnam(nam, "flock: invalid timeout value");
+		    return 1;
+		}
+		timeout = (zlong)timeout_tmp;
+		break;
+
+	    case 'i':
+		/* retry interval in seconds */
+		if (optptr[1]) {
+		    optarg = optptr + 1;
+		    optptr += strlen(optarg) - 1;
+		} else if (!*args) {
+		    zwarnnam(nam,
+			     "flock: option %c requires "
+			     "a numeric retry interval",
+			     opt);
+		    return 1;
+		} else {
+		    optarg = *args++;
+		}
+		timeout_param = matheval(optarg);
+		if (!(timeout_param.type & MN_FLOAT)) {
+		    timeout_param.type = MN_FLOAT;
+		    timeout_param.u.d = (double)timeout_param.u.l;
+		}
+		timeout_tmp = timeout_param.u.d * 1e6;
+		if ((timeout_tmp < 1) || (timeout_tmp > LONG_MAX)) {
+		    zwarnnam(nam, "flock: invalid interval value");
+		    return 1;
+		}
+		timeout_retry = (long)timeout_tmp;
 		break;
 
 	    case 'u':
@@ -647,7 +687,8 @@
     lck.l_len = 0;  /* lock the whole file */
 
     if (timeout > 0) {
-	time_t end = time(NULL) + (time_t)timeout;
+	zlong now;
+	zlong end = time_clock_us() + timeout;
 	while (fcntl(flock_fd, F_SETLK, &lck) < 0) {
 	    if (errflag) {
                 zclose(flock_fd);
@@ -658,11 +699,15 @@
 		zwarnnam(nam, "failed to lock file %s: %e", args[0], errno);
 		return 1;
 	    }
-	    if (time(NULL) >= end) {
+	    now = time_clock_us();
+	    if (now >= end) {
                 zclose(flock_fd);
 		return 2;
             }
-	    sleep(1);
+	    if (now + timeout_retry > end) {
+		timeout_retry = end - now;
+	    }
+	    zsleep(timeout_retry);
 	}
     } else {
 	while (fcntl(flock_fd, timeout == 0 ? F_SETLK : F_SETLKW, &lck) < 0) {
diff -ruN zsh-5.8.orig/Src/utils.c zsh-5.8/Src/utils.c
--- zsh-5.8.orig/Src/utils.c	2020-01-13 06:39:15.000000000 +0100
+++ zsh-5.8/Src/utils.c	2020-03-08 18:43:02.756951315 +0100
@@ -2745,6 +2745,26 @@
 }
 
 /*
+ * Return the current time in microseconds, using the system's
+ * monotonic clock if supported, the wall clock if not.
+ */
+
+/**/
+zlong
+time_clock_us(void)
+{
+#if defined(HAS_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC)
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    return ts.tv_sec * (zlong)1e6 + ts.tv_nsec / 1000;
+#else
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return tv.tv_sec * (zlong)1e6 + tv.tv_usec;
+#endif
+}
+
+/*
  * Sleep for the given number of microseconds --- must be within
  * range of a long at the moment, but this is only used for
  * limited internal purposes.
diff -ruN zsh-5.8.orig/Test/V14system.ztst zsh-5.8/Test/V14system.ztst
--- zsh-5.8.orig/Test/V14system.ztst	1970-01-01 01:00:00.000000000 +0100
+++ zsh-5.8/Test/V14system.ztst	2020-03-14 21:59:02.351858164 +0100
@@ -0,0 +1,131 @@
+# Test zsh/system module
+
+%prep
+
+  if zmodload -s zsh/system && zmodload -s zsh/zselect; then
+    tst_dir=V13.tmp
+    mkdir -p -- $tst_dir
+  else
+    ZTST_unimplemented='the zsh/system and zsh/zselect modules are not available'
+  fi
+  : > $tst_dir/file # File on which to acquire flock.
+
+%test
+
+  (
+    zsystem flock -t 0.1 -i 0.000001 $tst_dir/file
+  )
+0:zsystem flock valid time arguments
+
+  (
+    zsystem flock -t -1      $tst_dir/file ||
+    zsystem flock -t 0.49e-6 $tst_dir/file ||
+    zsystem flock -t 1e100   $tst_dir/file ||
+    zsystem flock -i -1      $tst_dir/file ||
+    zsystem flock -i 0.49e-6 $tst_dir/file ||
+    zsystem flock -i 1e100   $tst_dir/file
+  )
+1:zsystem flock invalid time arguments
+?(eval):zsystem:2: flock: invalid timeout value
+?(eval):zsystem:3: flock: invalid timeout value
+?(eval):zsystem:4: flock: invalid timeout value
+?(eval):zsystem:5: flock: invalid interval value
+?(eval):zsystem:6: flock: invalid interval value
+?(eval):zsystem:7: flock: invalid interval value
+
+  (
+    # Lock file for 1 second in the background.
+    (zsystem flock $tst_dir/file \
+     && touch $tst_dir/locked \
+     && zselect -t 100
+     rm -f $tst_dir/locked) &
+    # Wait until sub-shell above has started.
+    while ! [[ -f $tst_dir/locked ]]; do
+      zselect -t 1
+    done
+    # Attempt to lock file with 0.5 second timeout: must fail.
+    zsystem flock -t 0.5 $tst_dir/file
+  )
+2:zsystem flock unsuccessful wait test
+
+  (
+    # Wait until sub-shell of the previous test has finished.
+    while [[ -f $tst_dir/locked ]]; do
+      zselect -t 10
+    done
+    # Lock file for 0.5 second in the background.
+    (zsystem flock $tst_dir/file \
+      && touch $tst_dir/locked \
+      && zselect -t 50
+      rm -f $tst_dir/locked) &
+    # Wait until sub-shell above has started.
+    while ! [[ -f $tst_dir/locked ]]; do
+      zselect -t 1
+    done
+    typeset -F SECONDS
+    start=$SECONDS
+    # Attempt to lock file without a timeout:
+    # must succeed after sub-shell above releases it (0.5 second).
+    if zsystem flock $tst_dir/file; then
+      elapsed=$[ $SECONDS - $start ]
+      if [[ $elapsed -ge 0.3 && $elapsed -le 0.7 ]]; then
+        echo "elapsed time seems OK" 1>&2
+      else
+        echo "elapsed time $elapsed should be ~ 0.5 second" 1>&2
+      fi
+    fi
+  )
+0:zsystem flock successful wait test, no timeout
+?elapsed time seems OK
+
+  (
+    # Lock file for 0.5 second in the background.
+    (zsystem flock $tst_dir/file \
+      && touch $tst_dir/locked \
+      && zselect -t 50
+      rm -f $tst_dir/locked) &
+    # Wait until sub-shell above has started.
+    while ! [[ -f $tst_dir/locked ]]; do
+      zselect -t 1
+    done
+    typeset -F SECONDS
+    start=$SECONDS
+    # Attempt to lock file with 1-second timeout:
+    # must succeed 1 second after start because we retry every 1 second.
+    if zsystem flock -t 1 $tst_dir/file; then
+      elapsed=$[ $SECONDS - $start ]
+      if [[ $elapsed -ge 0.8 && $elapsed -le 1.2 ]]; then
+        echo "elapsed time seems OK" 1>&2
+      else
+        echo "elapsed time $elapsed should be ~ 1 second" 1>&2
+      fi
+    fi
+  )
+0:zsystem flock successful wait test, integral seconds
+?elapsed time seems OK
+
+  (
+    # Lock file for 0.25 second in the background.
+    (zsystem flock $tst_dir/file \
+      && touch $tst_dir/locked \
+      && zselect -t 25
+      rm -f $tst_dir/locked) &
+    # Wait until sub-shell above has started.
+    while ! [[ -f $tst_dir/locked ]]; do
+      zselect -t 1
+    done
+    typeset -F SECONDS
+    start=$SECONDS
+    # Attempt to lock file with 0.4-second timeout, retrying every 0.1 second:
+    # must succeed 0.3 second after start.
+    if zsystem flock -t 0.4 -i 0.1 $tst_dir/file; then
+      elapsed=$[ $SECONDS - $start ]
+      if [[ $elapsed -ge 0.2 && $elapsed -le 0.5 ]]; then
+        echo "elapsed time seems OK" 1>&2
+      else
+        echo "elapsed time $elapsed should be ~ 0.3 second" 1>&2
+      fi
+    fi
+  )
+0:zsystem flock successful wait test, fractional seconds
+?elapsed time seems OK



Messages sorted by: Reverse Date, Date, Thread, Author