[Openais] [PATCH openais whitetank] IPC: gracefully handle running out of file descriptors. (version

Steven Dake sdake at redhat.com
Sun Aug 8 22:17:43 PDT 2010


The model is when server has too many sockets in use, library returns 
TRY_AGAIN?

Regards
-steve

On 08/08/2010 08:01 PM, Angus Salkeld wrote:
>   Whenever we accept a new connection or close an
>   existing one, check the number of available file
>   descriptors and either publish or withdraw the
>   IPC listening socket.
>
> Signed-off-by: Angus Salkeld<asalkeld at redhat.com>
> ---
>   exec/ipc.c |   92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
>   1 files changed, 86 insertions(+), 6 deletions(-)
>
> diff --git a/exec/ipc.c b/exec/ipc.c
> index 5337d25..bf3102c 100644
> --- a/exec/ipc.c
> +++ b/exec/ipc.c
> @@ -99,6 +99,7 @@
>   #define MSG_SEND_UNLOCKED	1
>
>   static unsigned int g_gid_valid = 0;
> +static int32_t libais_server_fd = -1;
>
>   static void (*ipc_serialize_lock_fn) (void);
>
> @@ -161,6 +162,15 @@ static int priv_change (struct conn_info *conn_info);
>
>   static void ipc_disconnect (struct conn_info *conn_info);
>
> +static void server_socket_publish(void);
> +
> +static void server_socket_withdraw(void);
> +
> +static void server_socket_check(void);
> +
> +static int poll_handler_accept (poll_handle handle, int fd,
> +	int revent, void *data);
> +
>   static int ipc_thread_active (void *conn)
>   {
>   	struct conn_info *conn_info = (struct conn_info *)conn;
> @@ -211,6 +221,7 @@ static inline int conn_info_destroy (struct conn_info *conn_info)
>   		conn_info->state == CONN_STATE_DISCONNECT_INACTIVE) {
>   		list_del (&conn_info->list);
>   		close (conn_info->fd);
> +		server_socket_check();
>   		free (conn_info);
>   		return (-1);
>   	}
> @@ -257,6 +268,7 @@ static inline int conn_info_destroy (struct conn_info *conn_info)
>   		free (conn_info->private_data);
>   	}
>   	close (conn_info->fd);
> +	server_socket_check();
>   	free (conn_info);
>   	ipc_serialize_unlock_fn();
>   	return (-1);
> @@ -773,7 +785,12 @@ retry_accept:
>   	}
>
>   	if (new_fd == -1) {
> -		log_printf (LOG_LEVEL_ERROR, "ERROR: Could not accept Library connection: %s\n", strerror (errno));
> +		log_printf (LOG_LEVEL_ERROR,
> +			"ERROR: Could not accept Library connection: %s\n",
> +			strerror (errno));
> +		if (errno == EMFILE || errno == ENFILE) {
> +			server_socket_withdraw();
> +		}
>   		return (0); /* This is an error, but -1 would indicate disconnect from poll loop */
>   	}
>
> @@ -802,6 +819,7 @@ retry_accept:
>   	if (res != 0) {
>   		close (new_fd);
>   	}
> +	server_socket_check();
>
>   	return (0);
>   }
> @@ -835,14 +853,23 @@ void openais_ipc_init (
>   	void (*serialize_lock_fn) (void),
>   	void (*serialize_unlock_fn) (void))
>   {
> -	int libais_server_fd;
> -	struct sockaddr_un un_addr;
> -	int res;
> -
>   	ipc_serialize_lock_fn = serialize_lock_fn;
>
>   	ipc_serialize_unlock_fn = serialize_unlock_fn;
>
> +	server_socket_publish();
> +
> +	g_gid_valid = gid_valid;
> +}
> +
> +static void server_socket_publish(void)
> +{
> +	int32_t res = 0;
> +	struct sockaddr_un un_addr;
> +
> +	log_printf(LOG_LEVEL_WARNING,
> +		"Publishing socket for client connections.\n");
> +
>   	/*
>   	 * Create socket for libais clients, name socket, listen for connections
>   	 */
> @@ -885,8 +912,61 @@ void openais_ipc_init (
>            */
>           poll_dispatch_add (aisexec_poll_handle, libais_server_fd,
>                   POLLIN|POLLNVAL, 0, poll_handler_accept);
> +}
>
> -	g_gid_valid = gid_valid;
> +static void server_socket_withdraw(void)
> +{
> +	log_printf(LOG_LEVEL_WARNING,
> +		"Withdrawing socket for client connections.\n");
> +
> +	poll_dispatch_delete(aisexec_poll_handle, libais_server_fd);
> +	shutdown(libais_server_fd, SHUT_RDWR);
> +	close(libais_server_fd);
> +	libais_server_fd = -1;
> +}
> +
> +/*
> + * The actual used sockets is 12 but allowing a larger number
> + * for safety.
> + */
> +#define COROIPC_NUM_RESERVED_SOCKETS 25
> +
> +static int32_t num_avail_sockets(void)
> +{
> +	struct rlimit lim;
> +	int32_t open_socks = 0;
> +	int32_t res;
> +	struct list_head *list;
> +
> +	if (getrlimit(RLIMIT_NOFILE,&lim) == -1) {
> +		char error_str[100];
> +		strerror_r(errno, error_str, 100);
> +		log_printf(LOG_LEVEL_ERROR,
> +			"getrlimit: %s\n", error_str);
> +		return -1;
> +	}
> +
> +	for (list = conn_info_list_head.next; list !=&conn_info_list_head;
> +		list = list->next) {
> +		open_socks++;
> +	}
> +	res = lim.rlim_cur - (open_socks + COROIPC_NUM_RESERVED_SOCKETS);
> +	log_printf(LOG_LEVEL_DEBUG,
> +		"(lim.rlim_cur:%lu - (open_socks:%d + reserved:%d) == %d\n",
> +		lim.rlim_cur, open_socks, COROIPC_NUM_RESERVED_SOCKETS, res);
> +	return res;
> +}
> +
> +static void server_socket_check(void)
> +{
> +	int32_t num = num_avail_sockets();
> +
> +	if (libais_server_fd == -1&&  num>  0) {
> +		server_socket_publish();
> +	}
> +	else if (libais_server_fd != -1&&  num<= 0) {
> +		server_socket_withdraw();
> +	}
>   }
>
>   void openais_ipc_exit (void)



More information about the Openais mailing list