[Openais] [PATCH openais whitetank] IPC: gracefully handle running out of file descriptors. (version
Steven Dake
sdake at redhat.com
Sun Aug 8 22:17:43 PDT 2010
The model is when server has too many sockets in use, library returns
TRY_AGAIN?
Regards
-steve
On 08/08/2010 08:01 PM, Angus Salkeld wrote:
> Whenever we accept a new connection or close an
> existing one, check the number of available file
> descriptors and either publish or withdraw the
> IPC listening socket.
>
> Signed-off-by: Angus Salkeld<asalkeld at redhat.com>
> ---
> exec/ipc.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
> 1 files changed, 86 insertions(+), 6 deletions(-)
>
> diff --git a/exec/ipc.c b/exec/ipc.c
> index 5337d25..bf3102c 100644
> --- a/exec/ipc.c
> +++ b/exec/ipc.c
> @@ -99,6 +99,7 @@
> #define MSG_SEND_UNLOCKED 1
>
> static unsigned int g_gid_valid = 0;
> +static int32_t libais_server_fd = -1;
>
> static void (*ipc_serialize_lock_fn) (void);
>
> @@ -161,6 +162,15 @@ static int priv_change (struct conn_info *conn_info);
>
> static void ipc_disconnect (struct conn_info *conn_info);
>
> +static void server_socket_publish(void);
> +
> +static void server_socket_withdraw(void);
> +
> +static void server_socket_check(void);
> +
> +static int poll_handler_accept (poll_handle handle, int fd,
> + int revent, void *data);
> +
> static int ipc_thread_active (void *conn)
> {
> struct conn_info *conn_info = (struct conn_info *)conn;
> @@ -211,6 +221,7 @@ static inline int conn_info_destroy (struct conn_info *conn_info)
> conn_info->state == CONN_STATE_DISCONNECT_INACTIVE) {
> list_del (&conn_info->list);
> close (conn_info->fd);
> + server_socket_check();
> free (conn_info);
> return (-1);
> }
> @@ -257,6 +268,7 @@ static inline int conn_info_destroy (struct conn_info *conn_info)
> free (conn_info->private_data);
> }
> close (conn_info->fd);
> + server_socket_check();
> free (conn_info);
> ipc_serialize_unlock_fn();
> return (-1);
> @@ -773,7 +785,12 @@ retry_accept:
> }
>
> if (new_fd == -1) {
> - log_printf (LOG_LEVEL_ERROR, "ERROR: Could not accept Library connection: %s\n", strerror (errno));
> + log_printf (LOG_LEVEL_ERROR,
> + "ERROR: Could not accept Library connection: %s\n",
> + strerror (errno));
> + if (errno == EMFILE || errno == ENFILE) {
> + server_socket_withdraw();
> + }
> return (0); /* This is an error, but -1 would indicate disconnect from poll loop */
> }
>
> @@ -802,6 +819,7 @@ retry_accept:
> if (res != 0) {
> close (new_fd);
> }
> + server_socket_check();
>
> return (0);
> }
> @@ -835,14 +853,23 @@ void openais_ipc_init (
> void (*serialize_lock_fn) (void),
> void (*serialize_unlock_fn) (void))
> {
> - int libais_server_fd;
> - struct sockaddr_un un_addr;
> - int res;
> -
> ipc_serialize_lock_fn = serialize_lock_fn;
>
> ipc_serialize_unlock_fn = serialize_unlock_fn;
>
> + server_socket_publish();
> +
> + g_gid_valid = gid_valid;
> +}
> +
> +static void server_socket_publish(void)
> +{
> + int32_t res = 0;
> + struct sockaddr_un un_addr;
> +
> + log_printf(LOG_LEVEL_WARNING,
> + "Publishing socket for client connections.\n");
> +
> /*
> * Create socket for libais clients, name socket, listen for connections
> */
> @@ -885,8 +912,61 @@ void openais_ipc_init (
> */
> poll_dispatch_add (aisexec_poll_handle, libais_server_fd,
> POLLIN|POLLNVAL, 0, poll_handler_accept);
> +}
>
> - g_gid_valid = gid_valid;
> +static void server_socket_withdraw(void)
> +{
> + log_printf(LOG_LEVEL_WARNING,
> + "Withdrawing socket for client connections.\n");
> +
> + poll_dispatch_delete(aisexec_poll_handle, libais_server_fd);
> + shutdown(libais_server_fd, SHUT_RDWR);
> + close(libais_server_fd);
> + libais_server_fd = -1;
> +}
> +
> +/*
> + * The actual used sockets is 12 but allowing a larger number
> + * for safety.
> + */
> +#define COROIPC_NUM_RESERVED_SOCKETS 25
> +
> +static int32_t num_avail_sockets(void)
> +{
> + struct rlimit lim;
> + int32_t open_socks = 0;
> + int32_t res;
> + struct list_head *list;
> +
> + if (getrlimit(RLIMIT_NOFILE,&lim) == -1) {
> + char error_str[100];
> + strerror_r(errno, error_str, 100);
> + log_printf(LOG_LEVEL_ERROR,
> + "getrlimit: %s\n", error_str);
> + return -1;
> + }
> +
> + for (list = conn_info_list_head.next; list !=&conn_info_list_head;
> + list = list->next) {
> + open_socks++;
> + }
> + res = lim.rlim_cur - (open_socks + COROIPC_NUM_RESERVED_SOCKETS);
> + log_printf(LOG_LEVEL_DEBUG,
> + "(lim.rlim_cur:%lu - (open_socks:%d + reserved:%d) == %d\n",
> + lim.rlim_cur, open_socks, COROIPC_NUM_RESERVED_SOCKETS, res);
> + return res;
> +}
> +
> +static void server_socket_check(void)
> +{
> + int32_t num = num_avail_sockets();
> +
> + if (libais_server_fd == -1&& num> 0) {
> + server_socket_publish();
> + }
> + else if (libais_server_fd != -1&& num<= 0) {
> + server_socket_withdraw();
> + }
> }
>
> void openais_ipc_exit (void)
More information about the Openais
mailing list