[Openais] [PATCH openais whitetank] IPC: gracefully handle running out of file descriptors. (version

Steven Dake sdake at redhat.com
Mon Aug 9 10:03:14 PDT 2010


On 08/08/2010 10:40 PM, Angus Salkeld wrote:
> On Sun, Aug 08, 2010 at 10:17:43PM -0700, Steven Dake wrote:
>> The model is when server has too many sockets in use, library
>> returns TRY_AGAIN?
> Hi Steve
>
> No, if the server runs out of fds then we shutdown the listening
> socket. The library returns LIB error, I believe.
>
> Then when we have more fds we setup the listening socket again.
>
> Is there any point of try again in this situation? I would say this
> is not a "normal" error and probalby shows a machine setup incorrectly.
> If processes start and quitely keep trying again it might not help anyone.
>
> -Angus
>

The check is done at ipc connection right?  In that case, my guess is it 
would appear the server is not operating and return "TRY_AGAIN".

In this case, the library should return CS_ERR_NO_RESOURCES, but I don't 
think that is possible.

I suppose for future versions of corosync we need to think a little more 
clearly on the possible error conditions that every api can return:

1. server out of resources
2. client out of resources
3. server too busy
4. server rejected security
5. server not operational


>>
>> Regards
>> -steve
>>
>> On 08/08/2010 08:01 PM, Angus Salkeld wrote:
>>>   Whenever we accept a new connection or close an
>>>   existing one, check the number of available file
>>>   descriptors and either publish or withdraw the
>>>   IPC listening socket.
>>>
>>> Signed-off-by: Angus Salkeld<asalkeld at redhat.com>
>>> ---
>>>   exec/ipc.c |   92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
>>>   1 files changed, 86 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/exec/ipc.c b/exec/ipc.c
>>> index 5337d25..bf3102c 100644
>>> --- a/exec/ipc.c
>>> +++ b/exec/ipc.c
>>> @@ -99,6 +99,7 @@
>>>   #define MSG_SEND_UNLOCKED	1
>>>
>>>   static unsigned int g_gid_valid = 0;
>>> +static int32_t libais_server_fd = -1;
>>>
>>>   static void (*ipc_serialize_lock_fn) (void);
>>>
>>> @@ -161,6 +162,15 @@ static int priv_change (struct conn_info *conn_info);
>>>
>>>   static void ipc_disconnect (struct conn_info *conn_info);
>>>
>>> +static void server_socket_publish(void);
>>> +
>>> +static void server_socket_withdraw(void);
>>> +
>>> +static void server_socket_check(void);
>>> +
>>> +static int poll_handler_accept (poll_handle handle, int fd,
>>> +	int revent, void *data);
>>> +
>>>   static int ipc_thread_active (void *conn)
>>>   {
>>>   	struct conn_info *conn_info = (struct conn_info *)conn;
>>> @@ -211,6 +221,7 @@ static inline int conn_info_destroy (struct conn_info *conn_info)
>>>   		conn_info->state == CONN_STATE_DISCONNECT_INACTIVE) {
>>>   		list_del (&conn_info->list);
>>>   		close (conn_info->fd);
>>> +		server_socket_check();
>>>   		free (conn_info);
>>>   		return (-1);
>>>   	}
>>> @@ -257,6 +268,7 @@ static inline int conn_info_destroy (struct conn_info *conn_info)
>>>   		free (conn_info->private_data);
>>>   	}
>>>   	close (conn_info->fd);
>>> +	server_socket_check();
>>>   	free (conn_info);
>>>   	ipc_serialize_unlock_fn();
>>>   	return (-1);
>>> @@ -773,7 +785,12 @@ retry_accept:
>>>   	}
>>>
>>>   	if (new_fd == -1) {
>>> -		log_printf (LOG_LEVEL_ERROR, "ERROR: Could not accept Library connection: %s\n", strerror (errno));
>>> +		log_printf (LOG_LEVEL_ERROR,
>>> +			"ERROR: Could not accept Library connection: %s\n",
>>> +			strerror (errno));
>>> +		if (errno == EMFILE || errno == ENFILE) {
>>> +			server_socket_withdraw();
>>> +		}
>>>   		return (0); /* This is an error, but -1 would indicate disconnect from poll loop */
>>>   	}
>>>
>>> @@ -802,6 +819,7 @@ retry_accept:
>>>   	if (res != 0) {
>>>   		close (new_fd);
>>>   	}
>>> +	server_socket_check();
>>>
>>>   	return (0);
>>>   }
>>> @@ -835,14 +853,23 @@ void openais_ipc_init (
>>>   	void (*serialize_lock_fn) (void),
>>>   	void (*serialize_unlock_fn) (void))
>>>   {
>>> -	int libais_server_fd;
>>> -	struct sockaddr_un un_addr;
>>> -	int res;
>>> -
>>>   	ipc_serialize_lock_fn = serialize_lock_fn;
>>>
>>>   	ipc_serialize_unlock_fn = serialize_unlock_fn;
>>>
>>> +	server_socket_publish();
>>> +
>>> +	g_gid_valid = gid_valid;
>>> +}
>>> +
>>> +static void server_socket_publish(void)
>>> +{
>>> +	int32_t res = 0;
>>> +	struct sockaddr_un un_addr;
>>> +
>>> +	log_printf(LOG_LEVEL_WARNING,
>>> +		"Publishing socket for client connections.\n");
>>> +
>>>   	/*
>>>   	 * Create socket for libais clients, name socket, listen for connections
>>>   	 */
>>> @@ -885,8 +912,61 @@ void openais_ipc_init (
>>>            */
>>>           poll_dispatch_add (aisexec_poll_handle, libais_server_fd,
>>>                   POLLIN|POLLNVAL, 0, poll_handler_accept);
>>> +}
>>>
>>> -	g_gid_valid = gid_valid;
>>> +static void server_socket_withdraw(void)
>>> +{
>>> +	log_printf(LOG_LEVEL_WARNING,
>>> +		"Withdrawing socket for client connections.\n");
>>> +
>>> +	poll_dispatch_delete(aisexec_poll_handle, libais_server_fd);
>>> +	shutdown(libais_server_fd, SHUT_RDWR);
>>> +	close(libais_server_fd);
>>> +	libais_server_fd = -1;
>>> +}
>>> +
>>> +/*
>>> + * The actual used sockets is 12 but allowing a larger number
>>> + * for safety.
>>> + */
>>> +#define COROIPC_NUM_RESERVED_SOCKETS 25
>>> +
>>> +static int32_t num_avail_sockets(void)
>>> +{
>>> +	struct rlimit lim;
>>> +	int32_t open_socks = 0;
>>> +	int32_t res;
>>> +	struct list_head *list;
>>> +
>>> +	if (getrlimit(RLIMIT_NOFILE,&lim) == -1) {
>>> +		char error_str[100];
>>> +		strerror_r(errno, error_str, 100);
>>> +		log_printf(LOG_LEVEL_ERROR,
>>> +			"getrlimit: %s\n", error_str);
>>> +		return -1;
>>> +	}
>>> +
>>> +	for (list = conn_info_list_head.next; list !=&conn_info_list_head;
>>> +		list = list->next) {
>>> +		open_socks++;
>>> +	}
>>> +	res = lim.rlim_cur - (open_socks + COROIPC_NUM_RESERVED_SOCKETS);
>>> +	log_printf(LOG_LEVEL_DEBUG,
>>> +		"(lim.rlim_cur:%lu - (open_socks:%d + reserved:%d) == %d\n",
>>> +		lim.rlim_cur, open_socks, COROIPC_NUM_RESERVED_SOCKETS, res);
>>> +	return res;
>>> +}
>>> +
>>> +static void server_socket_check(void)
>>> +{
>>> +	int32_t num = num_avail_sockets();
>>> +
>>> +	if (libais_server_fd == -1&&   num>   0) {
>>> +		server_socket_publish();
>>> +	}
>>> +	else if (libais_server_fd != -1&&   num<= 0) {
>>> +		server_socket_withdraw();
>>> +	}
>>>   }
>>>
>>>   void openais_ipc_exit (void)
> _______________________________________________
> Openais mailing list
> Openais at lists.linux-foundation.org
> https://lists.linux-foundation.org/mailman/listinfo/openais



More information about the Openais mailing list