[Openais] [PATCH openais whitetank] IPC: gracefully handle running out of file descriptors. (version
Angus Salkeld
asalkeld at redhat.com
Sun Aug 8 22:40:26 PDT 2010
On Sun, Aug 08, 2010 at 10:17:43PM -0700, Steven Dake wrote:
> The model is when server has too many sockets in use, library
> returns TRY_AGAIN?
Hi Steve
No, if the server runs out of fds then we shutdown the listening
socket. The library returns LIB error, I believe.
Then when we have more fds we setup the listening socket again.
Is there any point of try again in this situation? I would say this
is not a "normal" error and probalby shows a machine setup incorrectly.
If processes start and quitely keep trying again it might not help anyone.
-Angus
>
> Regards
> -steve
>
> On 08/08/2010 08:01 PM, Angus Salkeld wrote:
> > Whenever we accept a new connection or close an
> > existing one, check the number of available file
> > descriptors and either publish or withdraw the
> > IPC listening socket.
> >
> >Signed-off-by: Angus Salkeld<asalkeld at redhat.com>
> >---
> > exec/ipc.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
> > 1 files changed, 86 insertions(+), 6 deletions(-)
> >
> >diff --git a/exec/ipc.c b/exec/ipc.c
> >index 5337d25..bf3102c 100644
> >--- a/exec/ipc.c
> >+++ b/exec/ipc.c
> >@@ -99,6 +99,7 @@
> > #define MSG_SEND_UNLOCKED 1
> >
> > static unsigned int g_gid_valid = 0;
> >+static int32_t libais_server_fd = -1;
> >
> > static void (*ipc_serialize_lock_fn) (void);
> >
> >@@ -161,6 +162,15 @@ static int priv_change (struct conn_info *conn_info);
> >
> > static void ipc_disconnect (struct conn_info *conn_info);
> >
> >+static void server_socket_publish(void);
> >+
> >+static void server_socket_withdraw(void);
> >+
> >+static void server_socket_check(void);
> >+
> >+static int poll_handler_accept (poll_handle handle, int fd,
> >+ int revent, void *data);
> >+
> > static int ipc_thread_active (void *conn)
> > {
> > struct conn_info *conn_info = (struct conn_info *)conn;
> >@@ -211,6 +221,7 @@ static inline int conn_info_destroy (struct conn_info *conn_info)
> > conn_info->state == CONN_STATE_DISCONNECT_INACTIVE) {
> > list_del (&conn_info->list);
> > close (conn_info->fd);
> >+ server_socket_check();
> > free (conn_info);
> > return (-1);
> > }
> >@@ -257,6 +268,7 @@ static inline int conn_info_destroy (struct conn_info *conn_info)
> > free (conn_info->private_data);
> > }
> > close (conn_info->fd);
> >+ server_socket_check();
> > free (conn_info);
> > ipc_serialize_unlock_fn();
> > return (-1);
> >@@ -773,7 +785,12 @@ retry_accept:
> > }
> >
> > if (new_fd == -1) {
> >- log_printf (LOG_LEVEL_ERROR, "ERROR: Could not accept Library connection: %s\n", strerror (errno));
> >+ log_printf (LOG_LEVEL_ERROR,
> >+ "ERROR: Could not accept Library connection: %s\n",
> >+ strerror (errno));
> >+ if (errno == EMFILE || errno == ENFILE) {
> >+ server_socket_withdraw();
> >+ }
> > return (0); /* This is an error, but -1 would indicate disconnect from poll loop */
> > }
> >
> >@@ -802,6 +819,7 @@ retry_accept:
> > if (res != 0) {
> > close (new_fd);
> > }
> >+ server_socket_check();
> >
> > return (0);
> > }
> >@@ -835,14 +853,23 @@ void openais_ipc_init (
> > void (*serialize_lock_fn) (void),
> > void (*serialize_unlock_fn) (void))
> > {
> >- int libais_server_fd;
> >- struct sockaddr_un un_addr;
> >- int res;
> >-
> > ipc_serialize_lock_fn = serialize_lock_fn;
> >
> > ipc_serialize_unlock_fn = serialize_unlock_fn;
> >
> >+ server_socket_publish();
> >+
> >+ g_gid_valid = gid_valid;
> >+}
> >+
> >+static void server_socket_publish(void)
> >+{
> >+ int32_t res = 0;
> >+ struct sockaddr_un un_addr;
> >+
> >+ log_printf(LOG_LEVEL_WARNING,
> >+ "Publishing socket for client connections.\n");
> >+
> > /*
> > * Create socket for libais clients, name socket, listen for connections
> > */
> >@@ -885,8 +912,61 @@ void openais_ipc_init (
> > */
> > poll_dispatch_add (aisexec_poll_handle, libais_server_fd,
> > POLLIN|POLLNVAL, 0, poll_handler_accept);
> >+}
> >
> >- g_gid_valid = gid_valid;
> >+static void server_socket_withdraw(void)
> >+{
> >+ log_printf(LOG_LEVEL_WARNING,
> >+ "Withdrawing socket for client connections.\n");
> >+
> >+ poll_dispatch_delete(aisexec_poll_handle, libais_server_fd);
> >+ shutdown(libais_server_fd, SHUT_RDWR);
> >+ close(libais_server_fd);
> >+ libais_server_fd = -1;
> >+}
> >+
> >+/*
> >+ * The actual used sockets is 12 but allowing a larger number
> >+ * for safety.
> >+ */
> >+#define COROIPC_NUM_RESERVED_SOCKETS 25
> >+
> >+static int32_t num_avail_sockets(void)
> >+{
> >+ struct rlimit lim;
> >+ int32_t open_socks = 0;
> >+ int32_t res;
> >+ struct list_head *list;
> >+
> >+ if (getrlimit(RLIMIT_NOFILE,&lim) == -1) {
> >+ char error_str[100];
> >+ strerror_r(errno, error_str, 100);
> >+ log_printf(LOG_LEVEL_ERROR,
> >+ "getrlimit: %s\n", error_str);
> >+ return -1;
> >+ }
> >+
> >+ for (list = conn_info_list_head.next; list !=&conn_info_list_head;
> >+ list = list->next) {
> >+ open_socks++;
> >+ }
> >+ res = lim.rlim_cur - (open_socks + COROIPC_NUM_RESERVED_SOCKETS);
> >+ log_printf(LOG_LEVEL_DEBUG,
> >+ "(lim.rlim_cur:%lu - (open_socks:%d + reserved:%d) == %d\n",
> >+ lim.rlim_cur, open_socks, COROIPC_NUM_RESERVED_SOCKETS, res);
> >+ return res;
> >+}
> >+
> >+static void server_socket_check(void)
> >+{
> >+ int32_t num = num_avail_sockets();
> >+
> >+ if (libais_server_fd == -1&& num> 0) {
> >+ server_socket_publish();
> >+ }
> >+ else if (libais_server_fd != -1&& num<= 0) {
> >+ server_socket_withdraw();
> >+ }
> > }
> >
> > void openais_ipc_exit (void)
More information about the Openais
mailing list