[Openais] [PATCH openais whitetank] IPC: gracefully handle running out of file descriptors. (version

Angus Salkeld asalkeld at redhat.com
Sun Aug 8 22:40:26 PDT 2010


On Sun, Aug 08, 2010 at 10:17:43PM -0700, Steven Dake wrote:
> The model is when server has too many sockets in use, library
> returns TRY_AGAIN?
Hi Steve

No, if the server runs out of fds then we shutdown the listening
socket. The library returns LIB error, I believe.

Then when we have more fds we setup the listening socket again.

Is there any point of try again in this situation? I would say this
is not a "normal" error and probalby shows a machine setup incorrectly.
If processes start and quitely keep trying again it might not help anyone.

-Angus

> 
> Regards
> -steve
> 
> On 08/08/2010 08:01 PM, Angus Salkeld wrote:
> >  Whenever we accept a new connection or close an
> >  existing one, check the number of available file
> >  descriptors and either publish or withdraw the
> >  IPC listening socket.
> >
> >Signed-off-by: Angus Salkeld<asalkeld at redhat.com>
> >---
> >  exec/ipc.c |   92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
> >  1 files changed, 86 insertions(+), 6 deletions(-)
> >
> >diff --git a/exec/ipc.c b/exec/ipc.c
> >index 5337d25..bf3102c 100644
> >--- a/exec/ipc.c
> >+++ b/exec/ipc.c
> >@@ -99,6 +99,7 @@
> >  #define MSG_SEND_UNLOCKED	1
> >
> >  static unsigned int g_gid_valid = 0;
> >+static int32_t libais_server_fd = -1;
> >
> >  static void (*ipc_serialize_lock_fn) (void);
> >
> >@@ -161,6 +162,15 @@ static int priv_change (struct conn_info *conn_info);
> >
> >  static void ipc_disconnect (struct conn_info *conn_info);
> >
> >+static void server_socket_publish(void);
> >+
> >+static void server_socket_withdraw(void);
> >+
> >+static void server_socket_check(void);
> >+
> >+static int poll_handler_accept (poll_handle handle, int fd,
> >+	int revent, void *data);
> >+
> >  static int ipc_thread_active (void *conn)
> >  {
> >  	struct conn_info *conn_info = (struct conn_info *)conn;
> >@@ -211,6 +221,7 @@ static inline int conn_info_destroy (struct conn_info *conn_info)
> >  		conn_info->state == CONN_STATE_DISCONNECT_INACTIVE) {
> >  		list_del (&conn_info->list);
> >  		close (conn_info->fd);
> >+		server_socket_check();
> >  		free (conn_info);
> >  		return (-1);
> >  	}
> >@@ -257,6 +268,7 @@ static inline int conn_info_destroy (struct conn_info *conn_info)
> >  		free (conn_info->private_data);
> >  	}
> >  	close (conn_info->fd);
> >+	server_socket_check();
> >  	free (conn_info);
> >  	ipc_serialize_unlock_fn();
> >  	return (-1);
> >@@ -773,7 +785,12 @@ retry_accept:
> >  	}
> >
> >  	if (new_fd == -1) {
> >-		log_printf (LOG_LEVEL_ERROR, "ERROR: Could not accept Library connection: %s\n", strerror (errno));
> >+		log_printf (LOG_LEVEL_ERROR,
> >+			"ERROR: Could not accept Library connection: %s\n",
> >+			strerror (errno));
> >+		if (errno == EMFILE || errno == ENFILE) {
> >+			server_socket_withdraw();
> >+		}
> >  		return (0); /* This is an error, but -1 would indicate disconnect from poll loop */
> >  	}
> >
> >@@ -802,6 +819,7 @@ retry_accept:
> >  	if (res != 0) {
> >  		close (new_fd);
> >  	}
> >+	server_socket_check();
> >
> >  	return (0);
> >  }
> >@@ -835,14 +853,23 @@ void openais_ipc_init (
> >  	void (*serialize_lock_fn) (void),
> >  	void (*serialize_unlock_fn) (void))
> >  {
> >-	int libais_server_fd;
> >-	struct sockaddr_un un_addr;
> >-	int res;
> >-
> >  	ipc_serialize_lock_fn = serialize_lock_fn;
> >
> >  	ipc_serialize_unlock_fn = serialize_unlock_fn;
> >
> >+	server_socket_publish();
> >+
> >+	g_gid_valid = gid_valid;
> >+}
> >+
> >+static void server_socket_publish(void)
> >+{
> >+	int32_t res = 0;
> >+	struct sockaddr_un un_addr;
> >+
> >+	log_printf(LOG_LEVEL_WARNING,
> >+		"Publishing socket for client connections.\n");
> >+
> >  	/*
> >  	 * Create socket for libais clients, name socket, listen for connections
> >  	 */
> >@@ -885,8 +912,61 @@ void openais_ipc_init (
> >           */
> >          poll_dispatch_add (aisexec_poll_handle, libais_server_fd,
> >                  POLLIN|POLLNVAL, 0, poll_handler_accept);
> >+}
> >
> >-	g_gid_valid = gid_valid;
> >+static void server_socket_withdraw(void)
> >+{
> >+	log_printf(LOG_LEVEL_WARNING,
> >+		"Withdrawing socket for client connections.\n");
> >+
> >+	poll_dispatch_delete(aisexec_poll_handle, libais_server_fd);
> >+	shutdown(libais_server_fd, SHUT_RDWR);
> >+	close(libais_server_fd);
> >+	libais_server_fd = -1;
> >+}
> >+
> >+/*
> >+ * The actual used sockets is 12 but allowing a larger number
> >+ * for safety.
> >+ */
> >+#define COROIPC_NUM_RESERVED_SOCKETS 25
> >+
> >+static int32_t num_avail_sockets(void)
> >+{
> >+	struct rlimit lim;
> >+	int32_t open_socks = 0;
> >+	int32_t res;
> >+	struct list_head *list;
> >+
> >+	if (getrlimit(RLIMIT_NOFILE,&lim) == -1) {
> >+		char error_str[100];
> >+		strerror_r(errno, error_str, 100);
> >+		log_printf(LOG_LEVEL_ERROR,
> >+			"getrlimit: %s\n", error_str);
> >+		return -1;
> >+	}
> >+
> >+	for (list = conn_info_list_head.next; list !=&conn_info_list_head;
> >+		list = list->next) {
> >+		open_socks++;
> >+	}
> >+	res = lim.rlim_cur - (open_socks + COROIPC_NUM_RESERVED_SOCKETS);
> >+	log_printf(LOG_LEVEL_DEBUG,
> >+		"(lim.rlim_cur:%lu - (open_socks:%d + reserved:%d) == %d\n",
> >+		lim.rlim_cur, open_socks, COROIPC_NUM_RESERVED_SOCKETS, res);
> >+	return res;
> >+}
> >+
> >+static void server_socket_check(void)
> >+{
> >+	int32_t num = num_avail_sockets();
> >+
> >+	if (libais_server_fd == -1&&  num>  0) {
> >+		server_socket_publish();
> >+	}
> >+	else if (libais_server_fd != -1&&  num<= 0) {
> >+		server_socket_withdraw();
> >+	}
> >  }
> >
> >  void openais_ipc_exit (void)


More information about the Openais mailing list