[Openais] [PATCH corosync trunk] IPC: gracefully handle running out of file descriptors.
Angus Salkeld
asalkeld at redhat.com
Sun Aug 8 18:28:44 PDT 2010
Whenever we accept a new connection or close an
existing one, check the number of available file
descriptors and either publish or withdraw the
IPC listening socket.
Signed-off-by: Angus Salkeld <asalkeld at redhat.com>
---
exec/coroipcs.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++---------
1 files changed, 87 insertions(+), 17 deletions(-)
diff --git a/exec/coroipcs.c b/exec/coroipcs.c
index aca9053..05b6a17 100644
--- a/exec/coroipcs.c
+++ b/exec/coroipcs.c
@@ -169,6 +169,14 @@ struct conn_info {
int poll_state;
};
+static int32_t coro_server_fd = -1;
+
+static void server_socket_publish(void);
+
+static void server_socket_withdraw(void);
+
+static void server_socket_check(void);
+
static int shared_mem_dispatch_bytes_left (const struct conn_info *conn_info);
static void outq_flush (struct conn_info *conn_info);
@@ -180,8 +188,6 @@ static void ipc_disconnect (struct conn_info *conn_info);
static void msg_send (void *conn, const struct iovec *iov, unsigned int iov_len,
int locked);
-static void _corosync_ipc_init(void);
-
#define log_printf(level, format, args...) \
do { \
if (api->log_printf) \
@@ -513,6 +519,7 @@ static inline int conn_info_destroy (struct conn_info *conn_info)
conn_info->state == CONN_STATE_DISCONNECT_INACTIVE) {
list_del (&conn_info->list);
close (conn_info->fd);
+ server_socket_check();
api->free (conn_info);
return (-1);
}
@@ -568,6 +575,7 @@ static inline int conn_info_destroy (struct conn_info *conn_info)
api->free (conn_info->private_data);
}
close (conn_info->fd);
+ server_socket_check();
res = circular_memory_unmap (conn_info->dispatch_buffer, conn_info->dispatch_size);
zcb_all_free (conn_info);
api->free (conn_info);
@@ -959,7 +967,7 @@ extern void coroipcs_ipc_init_v2 (
api->old_log_printf = NULL;
log_printf (LOGSYS_LEVEL_DEBUG, "you are using ipc api v2\n");
- _corosync_ipc_init ();
+ server_socket_publish();
}
extern void coroipcs_ipc_init (
@@ -997,34 +1005,69 @@ extern void coroipcs_ipc_init (
log_printf (LOGSYS_LEVEL_DEBUG, "you are using ipc api v1\n");
- _corosync_ipc_init ();
+ server_socket_publish();
}
-static void _corosync_ipc_init(void)
+/*
+ * The actual used sockets is 12 but allowing a larger number
+ * for safety.
+ */
+#define COROIPC_NUM_RESERVED_SOCKETS 25
+
+static int32_t num_avail_sockets(void)
{
- int server_fd;
+ struct rlimit lim;
+ int32_t open_socks = 0;
+ int32_t res;
+ struct list_head *list;
+
+ if (getrlimit(RLIMIT_NOFILE, &lim) == -1) {
+ char error_str[100];
+ strerror_r(errno, error_str, 100);
+ log_printf(LOGSYS_LEVEL_ERROR,
+ "getrlimit: %s\n", error_str);
+ return -1;
+ }
+
+ for (list = conn_info_list_head.next; list != &conn_info_list_head;
+ list = list->next) {
+ open_socks++;
+ }
+ res = (lim.rlim_cur - (open_socks + COROIPC_NUM_RESERVED_SOCKETS));
+ log_printf(LOGSYS_LEVEL_DEBUG, "(lim.rlim_cur:%d - (open_socks:%d + reserved:%d) == %d\n",
+ lim.rlim_cur, open_socks, COROIPC_NUM_RESERVED_SOCKETS, res);
+ return res;
+}
+
+static void server_socket_publish(void)
+{
+ int32_t res = 0;
struct sockaddr_un un_addr;
- int res;
+
+ log_printf (LOGSYS_LEVEL_WARNING,
+ "Publishing socket for client connections.\n");
/*
* Create socket for IPC clients, name socket, listen for connections
*/
#if defined(COROSYNC_SOLARIS)
- server_fd = socket (PF_UNIX, SOCK_STREAM, 0);
+ coro_server_fd = socket (PF_UNIX, SOCK_STREAM, 0);
#else
- server_fd = socket (PF_LOCAL, SOCK_STREAM, 0);
+ coro_server_fd = socket (PF_LOCAL, SOCK_STREAM, 0);
#endif
- if (server_fd == -1) {
+ if (coro_server_fd == -1) {
log_printf (LOGSYS_LEVEL_CRIT, "Cannot create client connections socket.\n");
api->fatal_error ("Can't create library listen socket");
}
- res = fcntl (server_fd, F_SETFL, O_NONBLOCK);
+ res = fcntl (coro_server_fd, F_SETFL, O_NONBLOCK);
if (res == -1) {
char error_str[100];
- strerror_r (errno, error_str, 100);
- log_printf (LOGSYS_LEVEL_CRIT, "Could not set non-blocking operation on server socket: %s\n", error_str);
- api->fatal_error ("Could not set non-blocking operation on server socket");
+ strerror_r(errno, error_str, 100);
+ log_printf(LOGSYS_LEVEL_CRIT,
+ "Could not set non-blocking operation on server socket: %s\n",
+ error_str);
+ api->fatal_error("Could not set non-blocking operation on server socket");
}
memset (&un_addr, 0, sizeof (struct sockaddr_un));
@@ -1048,7 +1091,7 @@ static void _corosync_ipc_init(void)
}
#endif
- res = bind (server_fd, (struct sockaddr *)&un_addr, COROSYNC_SUN_LEN(&un_addr));
+ res = bind (coro_server_fd, (struct sockaddr *)&un_addr, COROSYNC_SUN_LEN(&un_addr));
if (res) {
char error_str[100];
strerror_r (errno, error_str, 100);
@@ -1063,12 +1106,35 @@ static void _corosync_ipc_init(void)
#if !defined(COROSYNC_LINUX)
res = chmod (un_addr.sun_path, S_IRWXU|S_IRWXG|S_IRWXO);
#endif
- listen (server_fd, SERVER_BACKLOG);
+ listen (coro_server_fd, SERVER_BACKLOG);
/*
* Setup connection dispatch routine
*/
- api->poll_accept_add (server_fd);
+ api->poll_accept_add (coro_server_fd);
+}
+
+static void server_socket_withdraw(void)
+{
+ log_printf(LOGSYS_LEVEL_WARNING,
+ "Withdrawing socket for client connections.\n");
+
+ api->poll_dispatch_destroy(coro_server_fd, NULL);
+ shutdown(coro_server_fd, SHUT_RDWR);
+ close(coro_server_fd);
+ coro_server_fd = -1;
+}
+
+static void server_socket_check(void)
+{
+ int32_t num = num_avail_sockets();
+
+ if (coro_server_fd == -1 && num > 0) {
+ server_socket_publish();
+ }
+ else if (coro_server_fd != -1 && num <= 0) {
+ server_socket_withdraw();
+ }
}
void coroipcs_ipc_exit (void)
@@ -1447,6 +1513,9 @@ retry_accept:
strerror_r (errno, error_str, 100);
log_printf (LOGSYS_LEVEL_ERROR,
"Could not accept Library connection: %s\n", error_str);
+ if (errno == EMFILE || errno == ENFILE) {
+ server_socket_withdraw();
+ }
return (0); /* This is an error, but -1 would indicate disconnect from poll loop */
}
@@ -1476,6 +1545,7 @@ retry_accept:
if (res != 0) {
close (new_fd);
}
+ server_socket_check();
return (0);
}
--
1.7.1
More information about the Openais
mailing list