[Openais] [lck] patches segfault, lock_algo, orphan locks

Pascal Bouchareine pascal at gandi.net
Mon Sep 24 05:19:00 PDT 2007


Hi, 

Three patches attached : 

lock_algo queues lock requests rather than ignoring them if
an exclusive lock is held on a resource. This led to stale
processes trying to acquire an exclusive lock forever.

segfault does not use source.conn unless the originating
request is local. The actual code tries to dereference pointers
created on another node.

orphan_locks is a guess that, with the "lock_fd/close" hack used
today, in conjunction with the lck_lib_exit_fn called whenever
the fd is closed, the actual code should lead to orphan locks.

This last one is tested but quite unsure to me.

Do they seem correct to you ?

Thanks,
Pascal

-- 
\o/   Pascal Bouchareine - Gandi 
 g    0170393757           15, place de la Nation - 75011 Paris      
-------------- next part --------------
Index: lck.c
===================================================================
--- lck.c	(revision 1454)
+++ lck.c	(working copy)
@@ -1016,9 +1017,7 @@
 		/*
 		 * Exclusive lock granted
 		 */
-		if (resource_lock->lock_mode == SA_LCK_PR_LOCK_MODE) {
-			lock_queue (resource, resource_lock);
-		}
+		lock_queue (resource, resource_lock);
 	} else {
 		/*
 		 * Exclusive lock not granted
-------------- next part --------------
Index: lck.c
===================================================================
--- lck.c	(revision 1454)
+++ lck.c	(working copy)
@@ -731,7 +731,7 @@
 	struct resource *resource;
 	struct resource_cleanup *resource_cleanup;
 	SaAisErrorT error = SA_AIS_OK;
-	struct lck_pd *lck_pd = (struct lck_pd *)openais_conn_private_data_get (req_exec_lck_resourceopen->source.conn);
+	struct lck_pd *lck_pd;
 
 	log_printf (LOG_LEVEL_NOTICE, "EXEC request: saLckResourceOpen %s\n",
 		get_mar_name_t (&req_exec_lck_resourceopen->resource_name));
@@ -780,7 +780,8 @@
 		if (resource_cleanup == 0) {
 			free (resource);
 			error = SA_AIS_ERR_NO_MEMORY;
-		} else {
+		} else { 
+			lck_pd = (struct lck_pd *)openais_conn_private_data_get (req_exec_lck_resourceopen->source.conn);
 			list_init (&resource_cleanup->list);
 			list_init (&resource_cleanup->resource_lock_list_head);
 			resource_cleanup->resource = resource;
-------------- next part --------------
Index: lck.c
===================================================================
--- lck.c	(revision 1454)
+++ lck.c	(working copy)
@@ -843,8 +843,8 @@
 	SaAisErrorT error;
 	struct lckResourceInstance *lckResourceInstance;
 	struct lckLockIdInstance *lckLockIdInstance;
-	int lock_fd;
-	int dummy_fd;
+//	int lock_fd;
+//	int dummy_fd;
 
 	error = saHandleInstanceGet (&lckResourceHandleDatabase, lckResourceHandle,
 		(void *)&lckResourceInstance);
@@ -864,10 +864,10 @@
 		goto error_destroy;
 	}
 
-	error = saServiceConnect (&lock_fd, &dummy_fd, LCK_SERVICE);
-	if (error != SA_AIS_OK) { // TODO error handling
-		goto error_destroy;
-	}
+//	error = saServiceConnect (&lock_fd, &dummy_fd, LCK_SERVICE);
+//	if (error != SA_AIS_OK) { // TODO error handling
+//		goto error_destroy;
+//	}
 
 	lckLockIdInstance->response_mutex = lckResourceInstance->response_mutex;
 	lckLockIdInstance->response_fd = lckResourceInstance->response_fd;
@@ -892,14 +892,16 @@
 	/*
 	 * no mutex needed here since its a new connection
 	 */
-	error = saSendReceiveReply (lock_fd, 
+	pthread_mutex_lock(lckLockIdInstance->response_mutex);
+	error = saSendReceiveReply (lckLockIdInstance->response_fd,
 		&req_lib_lck_resourcelock,
 		sizeof (struct req_lib_lck_resourcelock),
 		&res_lib_lck_resourcelock,
 		sizeof (struct res_lib_lck_resourcelock));
+	pthread_mutex_unlock(lckLockIdInstance->response_mutex);
 
-	close (lock_fd);
-	close (dummy_fd);
+//	close (lock_fd);
+//	close (dummy_fd);
 
 	if (error == SA_AIS_OK) {
 		lckLockIdInstance->resource_lock = res_lib_lck_resourcelock.resource_lock;


More information about the Openais mailing list