[PATCH 11/11] filelease2: Test C/R during lease-break-interval

Sukadev Bhattiprolu sukadev at linux.vnet.ibm.com
Fri Jan 29 12:44:21 PST 2010


From: Sukadev Bhattiprolu <sukadev at linux.vnet.ibm.com>
Date: Thu, 28 Jan 2010 23:18:50 -0800
Subject: [PATCH 11/11] filelease2: Test C/R during lease-break-interval

When a process, P1 (lease-owner of a file) is notified of an impending
lease-break (due to a conflicting open of the file by process P2 (lease-
breaker), have P1 "flush" some data to the file. Checkpoint processes
P1 and P2 before the kernel forcibly revokes the lease (i.e checkpoint
within the lease-break-interval).

Upon restart, verify that the P2's open() of the file completes properly
and that the data flushed by P1 is visible to P2.

NOTE:
	This test passes even with out support for checkpoint of file-locks.
	The reason for this is that the kernel terminates the lease of P1
	before notifying P1 about the lease-break. Since we checkpoint while
	P1 is in signal handler, neither P1 nor P2 have a lease and so the
	C/R passes. Hopefully the data-comparison checks will catch some
	errors during development. If not, we may need to redesign the test
	or drop it later.

Signed-off-by: Sukadev Bhattiprolu <sukadev at linux.vnet.ibm.com>
---
 fileio/Makefile          |    4 +-
 fileio/filelease2.c      |  400 ++++++++++++++++++++++++++++++++++++++++++++++
 fileio/run-filelease2.sh |    3 +
 3 files changed, 405 insertions(+), 2 deletions(-)
 create mode 100644 fileio/filelease2.c
 create mode 100755 fileio/run-filelease2.sh

diff --git a/fileio/Makefile b/fileio/Makefile
index bd28561..4cf401b 100644
--- a/fileio/Makefile
+++ b/fileio/Makefile
@@ -1,4 +1,4 @@
-targets = fileio1 filelock1 filelease1 fsetown1
+targets = fileio1 filelock1 fsetown1 filelease1 filelease2
 
 INCLUDE   = ../libcrtest
 LIBCRTEST = ../libcrtest/common.o
@@ -9,4 +9,4 @@ all: $(LIBCRTEST) $(targets)
 
 clean:
 	rm -f $(targets)
-	rm -rf cr_fileio* cr_filelock1* cr_filelease1* cr_fsetown1*
+	rm -rf cr_fileio* cr_filelock1* cr_filelease[12]* cr_fsetown1*
diff --git a/fileio/filelease2.c b/fileio/filelease2.c
new file mode 100644
index 0000000..1a53617
--- /dev/null
+++ b/fileio/filelease2.c
@@ -0,0 +1,400 @@
+#include <stdio.h>
+#include <unistd.h>
+#define __USE_GNU
+#include <fcntl.h>
+#include <string.h>
+#include <signal.h>
+#include <errno.h>
+#include <wait.h>
+#include "libcrtest.h"
+
+#define TEST_FILE1	"data.d/data.filelease2"
+#define LOG_FILE	"logs.d/log.filelease2"
+
+int event_fd1;
+
+static int test_fd;
+static int got_sigio;
+static int num_children; 
+static int pid1, pid2;
+char test_data[256];
+
+/*
+ * Description:
+ * 	Ensure that processes checkpointed when they are in the middle
+ * 	of a lease-break, are restored correctly.
+ *
+ * Implementation:
+ * 	Process P1 takes F_WRLCK lease on a file.
+ * 	Process P2 attempts to set F_WRLCK lease on the file
+ * 	Process P1 gets a SIGIO signal about the pending lease-break.
+ * 	Initiate a checkpoint before the downgrade is complete.
+ * 	After checkpoint/restart, ensure Process P1 still has the lease
+ * 	and that it can be downgraded.
+ * 	Ensure Process P2 gets the F_RDLCK lease.
+ */
+
+char *get_lease_desc(int type)
+{
+	switch(type) {
+		case F_RDLCK: return "F_RDLCK";
+		case F_WRLCK: return "F_WRLCK";
+		case F_UNLCK: return "F_UNLCK";
+		default:	return "Unknown !";
+	}
+}
+
+void set_lease(int fd, int type)
+{
+	int rc;
+
+	fprintf(logfp, "%d: set_lease() called for fd %d, type %s\n",
+			getpid(), fd, get_lease_desc(type));
+
+	rc = fcntl(fd, F_SETLEASE, type);
+	if (rc < 0) {
+		fprintf(logfp, "%d: set_lease(type %d):, ERROR %s\n",
+				getpid(), type, strerror(errno));
+		if (errno == EINVAL)
+			fprintf(logfp, "%d: Maybe the fs does not support "
+					"F_SETLEASE (eg: NFS)\n", getpid());
+		fflush(logfp);
+		kill(getppid(), SIGUSR1);
+		do_exit(1);
+	}
+}
+
+void test_lease(int fd, int exp_type)
+{
+	int rc;
+
+	rc = fcntl(fd, F_GETLEASE, 0);
+	if (rc < 0 || rc > 2) {
+		fprintf(logfp, "ERROR: fcntl(F_GETLEASE): expected %s, rc %d, "
+				"error %s\n", get_lease_desc(exp_type), rc,
+				strerror(errno));
+		do_exit(1);
+	}
+
+	if (rc != exp_type) {
+		fprintf(logfp, "%d: FAIL: Expected %s, actual %s\n", getpid(),
+				get_lease_desc(exp_type), get_lease_desc(rc));
+		do_exit(1);
+	}
+
+	fprintf(logfp, "%d: PASS: Expected %s, actual %s\n", getpid(),
+			get_lease_desc(exp_type), get_lease_desc(rc));
+	return;
+}
+
+set_signal_action(int sig, void(*action)(int, siginfo_t *, void *))
+{
+	int rc;
+	struct sigaction act;
+
+	act.sa_sigaction = action;
+	sigemptyset(&act.sa_mask);
+	act.sa_flags = SA_SIGINFO;
+
+	rc = sigaction(sig, &act, NULL);
+	if (rc < 0) {
+		fprintf(logfp, "%d: sigaction() sig %d failed, error %s\n",
+				getpid(), sig, strerror(errno));
+		do_exit(1);
+	}
+}
+
+static void iohandler(int sig, siginfo_t *info, void *arg)
+{
+	int rc;
+
+	got_sigio++;
+	fprintf(logfp, "%d: Got signal %d\n", getpid(), sig);
+	fflush(logfp);
+	
+	/*
+	 * Before giving up the lease, write some data to the file
+	 */
+	rc = write(test_fd, test_data, sizeof(test_data));
+	if (rc != sizeof(test_data)) {
+		fprintf(logfp, "%d: write() failed, n %d, error %s\n", getpid(),
+				rc, strerror(errno));
+		do_exit(1);
+	}
+
+	set_checkpoint_ready();
+	fprintf(logfp, "***** %d: Ready for checkpoint\n", getpid());
+	fflush(logfp);
+
+	/*
+	 * Wait for checkpoint/restart
+	 */
+	while(!test_done())
+		sleep(1);
+
+	fprintf(logfp, "%d: Test-done\n", getpid());
+	fflush(logfp);
+
+	/*
+	 * Checkpoint/restart is done, ensure we still have the lease
+	 * and then terminate the lease.
+	 *
+	 * TODO: Looks like the lease is revoked even before the handler
+	 * 	 returns and hence the following test_lease() fails. This
+	 * 	 behavior is not obvious from the description of F_SETLEASE
+	 * 	 in the man page. Disable the test-lease() test for now
+	 * 	 (it does not affect C/R).
+	 */
+	/* test_lease(test_fd, F_WRLCK); */
+
+	set_lease(test_fd, F_UNLCK);
+
+	return;
+}
+
+/* Lease holder */
+int do_child1(int idx)
+{
+	int type = F_WRLCK;
+
+	fprintf(logfp, "%d: Setting lease to type %s\n", getpid(),
+			get_lease_desc(type));
+	fflush(logfp);
+
+	set_signal_action(SIGIO, iohandler);
+
+	test_fd = open(TEST_FILE1, O_RDWR);
+	if (test_fd < 0) {
+		fprintf(logfp, "%d: open(%s) failed, error %s\n", getpid(),
+				TEST_FILE1, strerror(errno));
+		do_exit(1);
+	}
+
+	set_lease(test_fd, type);
+
+	/*
+	 * Tell parent we are ready for checkpoint.
+	 */
+	notify_one_event(event_fd1);
+
+	while(!got_sigio)
+		sleep(1);
+
+	do_exit(0);
+}
+
+/* Lease breaker */
+int do_child2(int idx)
+{
+	int rc;
+	int fd;
+	int type = F_WRLCK;
+	char buf[256];
+
+	fprintf(logfp, "%d: Setting lease to type %s\n", getpid(),
+			get_lease_desc(type));
+	fflush(logfp);
+
+	/*
+	 * Tell parent we are (almost) ready for checkpoint.
+	 */
+	notify_one_event(event_fd1);
+
+	/*
+	 * To break the lease, open the file for write. This should block
+	 * until sibling drops the lease (after Checkpoint/restart is done).
+	 */
+	fd = open(TEST_FILE1, O_RDWR);
+	if (fd < 0) {
+		fprintf(logfp, "%d: open(%s) failed, error %s\n", getpid(),
+				TEST_FILE1, strerror(errno));
+		do_exit(1);
+	}
+
+	/*
+	 * If checkpoint is not done yet, then maybe the lease-break-interval
+	 * was not long enough for the wrapper scripts to complete checkpoint.
+	 * So fail the test.
+	 */
+	if (!test_checkpoint_done()) {
+		fprintf(logfp, "%d: Checkpoint not done yet ?\n", getpid());
+		do_exit(1);
+	}
+
+	rc = read(fd, buf, sizeof(test_data));
+	if (rc != sizeof(test_data)) {
+		fprintf(logfp, "%d: read() failed, rc %d, error %s\n",
+				getpid(), rc, strerror(errno));
+		do_exit(1);
+	}
+
+	if (memcmp(test_data, buf, sizeof(test_data))) {
+		fprintf(logfp, "%d: FAILED: Data miscompare !!!\n", getpid());
+		do_exit(1);
+	}
+
+	do_exit(0);
+}
+
+void setup_test_data()
+{
+	int rc;
+	int fd;
+	char buf[256];
+
+	rc = unlink(TEST_FILE1);
+	if (rc < 0 && errno != ENOENT) {
+		fprintf(logfp, "ERROR: unlink(%s): %s\n", TEST_FILE1,
+				strerror(errno));
+		do_exit(1);
+	}
+
+	fd = open(TEST_FILE1, O_RDWR|O_CREAT|O_TRUNC, 0666);
+	if (fd < 0) {
+		fprintf(logfp, "ERROR: open(%s): %s\n", TEST_FILE1,
+				strerror(errno));
+		do_exit(1);
+	}
+
+	memset(buf, 0, sizeof(buf));
+	write(fd, buf, sizeof(buf));
+
+	memset(test_data, 1, sizeof(test_data));
+	close(fd);
+
+	return;
+}
+
+kill_children(int sig)
+{
+	if (pid1)
+		kill(pid1, sig);
+	if (pid2)
+		kill(pid2, sig);
+	do_wait(2);
+}
+
+int create_child(int idx, int (*child_func)(int))
+{
+	int rc;
+
+	rc = fork();
+	if (rc == 0)
+		(*child_func)(idx);
+
+	if (rc < 0) {
+		fprintf(logfp, "%d: fork() failed, error %s\n", getpid(), 
+				strerror(errno));
+		kill_children(SIGKILL);
+	}
+
+	fprintf(logfp, "%d: Created child %d, pid %d\n", getpid(), idx, rc);
+	fflush(logfp);
+
+	num_children++;
+	wait_for_events(event_fd1, 1);
+
+	return rc;
+}
+
+void child_handler(int sig, siginfo_t *info, void *arg)
+{
+	int i;
+	int rc;
+	int status;
+
+	fprintf(logfp, "%d: Got signal %d\n", getpid(), sig);
+	fflush(logfp);
+
+	if (sig == SIGUSR1)
+		goto failed;
+
+	while(num_children) {
+		rc = waitpid(-1, &status, WNOHANG);
+		if (rc < 0) {
+			fprintf(logfp, "%d: waitpid(): failed, rc %d, "
+					"error %s\n", getpid(), rc,
+					strerror(errno));
+			goto failed;
+		}
+
+		if (!rc)
+			break;
+
+		if (WIFEXITED(status) && WEXITSTATUS(status) == 0)
+			num_children--;
+		else {
+			print_exit_status(info->si_pid, status);
+			goto failed;
+		}
+	}
+
+	if (!num_children) {
+		fprintf(logfp, "Both children exited cleanly, test passed\n");
+		do_exit(0);
+	}
+	return;
+
+failed:
+	kill_children(SIGKILL);
+	fprintf(logfp, "Test FAILED\n");
+	do_exit(1);
+}
+
+main(int argc, char *argv[])
+{
+	int i, status, rc;
+	int pid;
+
+	if (test_done()) {
+		printf("Remove %s before running test\n", TEST_DONE);
+		do_exit(1);
+	}
+
+	logfp = fopen(LOG_FILE, "w");
+	if (!logfp) {
+		perror("open() logfile");
+		do_exit(1);
+	}
+
+	printf("%s: Closing stdio fds and writing messages to %s\n",
+			argv[0], LOG_FILE);
+
+	for (i=0; i<100; i++)  {
+		if (fileno(logfp) != i)
+			close(i);
+	}
+
+	setup_test_data();
+	event_fd1 = setup_notification();
+
+	/*
+	 * Before waiting for events below, ensure we will be notified
+	 * if a child encounters an error and/or exits prematurely.
+	 */
+	set_signal_action(SIGUSR1, child_handler);
+	set_signal_action(SIGCHLD, child_handler);
+
+	pid1 = create_child(0, do_child1);
+
+	pid2 = create_child(1, do_child2);
+
+	/*
+	 * NOTE: We have some guessing to do here. The notification from
+	 * 	 the second child (in create_child()) just tells us that
+	 * 	 the child is _about_ to attempt the lease. Give it extra
+	 * 	 time to actually block before enabling checkpoint.
+	 *
+	 * 	 And this extra time must be less than the lease-break-window
+	 * 	 (set by the test wrapper-script.
+	 */
+	sleep(10);
+
+	/*
+	 * Just wait for children to exit and exit from SIGCHLD handler.
+	 */
+	while(num_children)
+		pause();
+
+	do_exit(9); /* should not get here */
+}
diff --git a/fileio/run-filelease2.sh b/fileio/run-filelease2.sh
new file mode 100755
index 0000000..67de611
--- /dev/null
+++ b/fileio/run-filelease2.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+./run-fcntltests.sh filelease2
-- 
1.6.0.4



More information about the Containers mailing list