[Openais] [PATCH 1/3] Stats-2 Add connection related statistics to the object db.

Steven Dake sdake at redhat.com
Fri Oct 9 10:39:32 PDT 2009


comments inline.  Great work, although needs a bit of rework for api
compat reasons.

On Thu, 2009-09-24 at 14:00 +1200, angus salkeld wrote:
> Hi
> 
> This is a re-work of the connection related stats, to remove the 
> dependence on objdb.
> 
> -Angus
> 
> Signed-off-by: Angus Salkeld <angus.salkeld at alliedtelesis.co.nz>
> ---
>  exec/coroipcs.c             |  110 ++++++++++++++++++++++++++++++++
>  exec/main.c                 |  147 ++++++++++++++++++++++++++++++++++++++++++-
>  include/corosync/coroipcs.h |    9 +++-
>  3 files changed, 264 insertions(+), 2 deletions(-)
> 
> diff --git a/exec/coroipcs.c b/exec/coroipcs.c
> index ab7be0b..b01696a 100644
> --- a/exec/coroipcs.c
> +++ b/exec/coroipcs.c
> @@ -72,6 +72,7 @@
>  #include <corosync/list.h>
>  
>  #include <corosync/coroipc_types.h>
> +#include <corosync/hdb.h>
>  #include <corosync/coroipcs.h>
>  #include <corosync/coroipc_ipc.h>
>  
> @@ -130,11 +131,13 @@ enum conn_state {
>  struct conn_info {
>  	int fd;
>  	pthread_t thread;
> +	pid_t client_pid;
>  	pthread_attr_t thread_attr;
>  	unsigned int service;
>  	enum conn_state state;
>  	int notify_flow_control_enabled;
>  	int refcount;
> +	hdb_handle_t stats_handle;
>  #if _POSIX_THREAD_PROCESS_SHARED < 1
>  	key_t semkey;
>  	int semid;
> @@ -180,6 +183,8 @@ static void sem_post_exit_thread (struct conn_info *conn_info)
>  retry_semop:
>  	res = sem_post (&conn_info->control_buffer->sem0);
>  	if (res == -1 && errno == EINTR) {
> +		api->stats_increment_value (conn_info->stats_handle,
> +									 "sem_retry_count");
>  		goto retry_semop;
>  	}
>  #else
> @@ -190,6 +195,8 @@ retry_semop:
>  retry_semop:
>  	res = semop (conn_info->semid, &sop, 1);
>  	if ((res == -1) && (errno == EINTR || errno == EAGAIN)) {
> +		api->stats_increment_value (conn_info->stats_handle,
> +									 "sem_retry_count");
>  		goto retry_semop;
>  	}
>  #endif
> @@ -437,6 +444,9 @@ static inline int conn_info_destroy (struct conn_info *conn_info)
>  	 * Retry library exit function if busy
>  	 */
>  	if (conn_info->state == CONN_STATE_THREAD_DESTROYED) {
> +
> +		api->stats_destroy_connection (conn_info->stats_handle);
> +
>  		res = api->exit_fn_get (conn_info->service) (conn_info);
>  		if (res == -1) {
>  			api->serialize_unlock ();
> @@ -588,6 +598,8 @@ retry_semwait:
>  			pthread_exit (0);
>  		}
>  		if ((res == -1) && (errno == EINTR)) {
> +			api->stats_increment_value (conn_info->stats_handle,
> +										"sem_retry_count");
>  			goto retry_semwait;
>  		}
>  #else
> @@ -602,6 +614,8 @@ retry_semop:
>  		}
>  		res = semop (conn_info->semid, &sop, 1);
>  		if ((res == -1) && (errno == EINTR || errno == EAGAIN)) {
> +			api->stats_increment_value (conn_info->stats_handle,
> +										"sem_retry_count");
>  			goto retry_semop;
>  		} else
>  		if ((res == -1) && (errno == EINVAL || errno == EIDRM)) {
> @@ -639,12 +653,17 @@ retry_semop:
>  		} else 
>  		if (send_ok) {
>  			api->serialize_lock();
> +
> +			api->stats_increment_value (conn_info->stats_handle,
> +										"requests");
>  			api->handler_fn_get (conn_info->service, header->id) (conn_info, header);
>  			api->serialize_unlock();
>  		} else {
>  			/*
>  			 * Overload, tell library to retry
>  			 */
> +			api->stats_increment_value (conn_info->stats_handle,
> +										"sem_retry_count");
>  			coroipc_response_header.size = sizeof (coroipc_response_header_t);
>  			coroipc_response_header.id = 0;
>  			coroipc_response_header.error = CS_ERR_TRY_AGAIN;
> @@ -672,9 +691,13 @@ req_setup_send (
>  retry_send:
>  	res = send (conn_info->fd, &res_setup, sizeof (mar_res_setup_t), MSG_WAITALL);
>  	if (res == -1 && errno == EINTR) {
> +		api->stats_increment_value (conn_info->stats_handle,
> +									"send_retry_count");
>  		goto retry_send;
>  	} else
>  	if (res == -1 && errno == EAGAIN) {
> +		api->stats_increment_value (conn_info->stats_handle,
> +									"send_retry_count");
>  		goto retry_send;
>  	}
>  	return (0);
> @@ -719,6 +742,8 @@ req_setup_recv (
>  retry_recv:
>  	res = recvmsg (conn_info->fd, &msg_recv, MSG_NOSIGNAL);
>  	if (res == -1 && errno == EINTR) {
> +		api->stats_increment_value (conn_info->stats_handle,
> +									"recv_retry_count");
>  		goto retry_recv;
>  	} else
>  	if (res == -1 && errno != EAGAIN) {
> @@ -785,6 +810,7 @@ retry_recv:
>  	assert (cmsg);
>  	cred = (struct ucred *)CMSG_DATA (cmsg);
>  	if (cred) {
> +		conn_info->client_pid = cred->pid;
>  		if (api->security_valid (cred->uid, cred->gid)) {
>  			authenticated = 1;
>  		}
> @@ -838,6 +864,7 @@ static int conn_info_create (int fd)
>  	memset (conn_info, 0, sizeof (struct conn_info));
>  
>  	conn_info->fd = fd;
> +	conn_info->client_pid = 0;
>  	conn_info->service = SOCKET_SERVICE_INIT;
>  	conn_info->state = CONN_STATE_THREAD_INACTIVE;
>  	list_init (&conn_info->outq_head);
> @@ -1000,12 +1027,16 @@ int coroipcs_response_send (void *conn, const void *msg, size_t mlen)
>  retry_semop:
>  	res = semop (conn_info->semid, &sop, 1);
>  	if ((res == -1) && (errno == EINTR || errno == EAGAIN)) {
> +		api->stats_increment_value (conn_info->stats_handle,
> +									 "sem_retry_count");
>  		goto retry_semop;
>  	} else
>  	if ((res == -1) && (errno == EINVAL || errno == EIDRM)) {
>  		return (0);
>  	}
>  #endif
> +	api->stats_increment_value (conn_info->stats_handle,
> +								"responses");
>  	return (0);
>  }
>  
> @@ -1038,12 +1069,16 @@ int coroipcs_response_iov_send (void *conn, const struct iovec *iov, unsigned in
>  retry_semop:
>  	res = semop (conn_info->semid, &sop, 1);
>  	if ((res == -1) && (errno == EINTR || errno == EAGAIN)) {
> +		api->stats_increment_value (conn_info->stats_handle,
> +									"sem_retry_count");
>  		goto retry_semop;
>  	} else
>  	if ((res == -1) && (errno == EINVAL || errno == EIDRM)) {
>  		return (0);
>  	}
>  #endif
> +	api->stats_increment_value (conn_info->stats_handle,
> +								"responses");
>  	return (0);
>  }
>  
> @@ -1115,12 +1150,16 @@ static void msg_send (void *conn, const struct iovec *iov, unsigned int iov_len,
>  retry_semop:
>  	res = semop (conn_info->semid, &sop, 1);
>  	if ((res == -1) && (errno == EINTR || errno == EAGAIN)) {
> +		api->stats_increment_value (conn_info->stats_handle,
> +									"sem_retry_count");
>  		goto retry_semop;
>  	} else
>  	if ((res == -1) && (errno == EINVAL || errno == EIDRM)) {
>  		return;
>  	}
>  #endif
> +	api->stats_increment_value (conn_info->stats_handle,
> +								"dispatched");
>  }
>  
>  static void outq_flush (struct conn_info *conn_info) {
> @@ -1173,9 +1212,13 @@ retry_recv:
>  		sizeof (mar_req_priv_change),
>  		MSG_NOSIGNAL);
>  	if (res == -1 && errno == EINTR) {
> +		api->stats_increment_value (conn_info->stats_handle,
> +									"recv_retry_count");
>  		goto retry_recv;
>  	}
>  	if (res == -1 && errno == EAGAIN) {
> +		api->stats_increment_value (conn_info->stats_handle,
> +									"recv_retry_count");
>  		goto retry_recv;
>  	}
>  	if (res == -1 && errno != EAGAIN) {
> @@ -1346,6 +1389,70 @@ retry_accept:
>  	return (0);
>  }
>  
> +static char * pid_to_name (pid_t pid, char* out_name, size_t name_len)
> +{
> +	char *name;
> +	char *rest;
> +	FILE *fp;
> +	char fname[32];
> +	char buf[256];
> +
> +	snprintf (fname, 32, "/proc/%d/stat", pid);
> +	fp = fopen (fname, "r");
> +	if (!fp) {
> +		return NULL;
> +	}
> +
> +	if (fgets (buf, sizeof (buf), fp) == NULL) {
> +		fclose (fp);
> +		return NULL;
> +	}
> +	fclose (fp);
> +
> +	name = strrchr (buf, '(');
> +	if (!name) {
> +		return NULL;
> +	}
> +
> +	/* move past the bracket */
> +	name++;
> +
> +	rest = strrchr (buf, ')');
> +
> +	if (rest == NULL || rest[1] != ' ') {
> +		return NULL;
> +	}
> +
> +	*rest = '\0';
> +	/* move past the NULL and space */
> +	rest += 2;
> +
> +	/* copy the name */
> +	strncpy (out_name, name, name_len);
> +	out_name[name_len] = '\0';
> +	return out_name;
> +}
> +

yikes.  I looked for a posix api to produce this information but
couldn't find one...  How does this work on OS without proc?


> +static void coroipcs_init_conn_stats (
> +	struct conn_info * conn)
> +{
> +	char conn_name[42];
> +	char proc_name[32];
> +	int res;
> +
> +	if (conn->client_pid > 0) {
> +		if (pid_to_name (conn->client_pid, proc_name, sizeof(proc_name)))
> +			snprintf (conn_name, sizeof(conn_name), "%s:%d:%d", proc_name, conn->client_pid, conn->fd);
> +		else
> +			snprintf (conn_name, sizeof(conn_name), "%d:%d", conn->client_pid, conn->fd);
> +	} else
> +		snprintf (conn_name, sizeof(conn_name), "%d", conn->fd);
> +
> +	conn->stats_handle = api->stats_create_connection (conn_name, conn->client_pid, conn->fd);
> +	api->stats_update_value (conn->stats_handle, "service_id",
> +							 &conn->service, sizeof(conn->service));
> +}
> +
>  int coroipcs_handler_dispatch (
>  	int fd,
>  	int revent,
> @@ -1447,6 +1554,9 @@ int coroipcs_handler_dispatch (
>  
>  		api->init_fn_get (conn_info->service) (conn_info);
>  
> +		/* create stats objects */
> +		coroipcs_init_conn_stats (conn_info);
> +
>  		pthread_attr_init (&conn_info->thread_attr);
>  		/*
>  		* IA64 needs more stack space then other arches
> diff --git a/exec/main.c b/exec/main.c
> index a85aa55..82ab44d 100644
> --- a/exec/main.c
> +++ b/exec/main.c
> @@ -129,6 +129,8 @@ static hdb_handle_t corosync_poll_handle;
>  
>  struct sched_param global_sched_param;
>  
> +static hdb_handle_t object_connection_handle;
> +
>  hdb_handle_t corosync_poll_handle_get (void)
>  {
>  	return (corosync_poll_handle);
> @@ -461,6 +463,7 @@ static void deliver_fn (
>  			((void *)msg);
>  	}
>  
> +

random cr

>  	ais_service[service]->exec_engine[fn_id].exec_handler_fn
>  		(msg, nodeid);
>  
> @@ -669,6 +672,105 @@ static void corosync_poll_dispatch_modify (
>  		corosync_poll_handler_dispatch);
>  }
>  
> +
> +static hdb_handle_t corosync_stats_create_connection (const char* name,
> +			const pid_t pid, const int fd)
> +{
> +	uint32_t zero_32 = 0;
> +	unsigned int key_incr_dummy;
> +	hdb_handle_t object_handle;
> +
> +	objdb->object_key_increment (object_connection_handle,
> +								 "active", strlen("active"),
> +								 &key_incr_dummy);
> +
> +	objdb->object_create (object_connection_handle,
> +		&object_handle,
> +		name,
> +		strlen (name));
> +
> +	objdb->object_key_create_ext (object_handle,
> +		"service_id",
> +		&zero_32,
> +		sizeof (zero_32),
> +		OBJDB_VALUETYPE_UINT32);
> +
> +	objdb->object_key_create_ext (object_handle,
> +		"client_pid",
> +		&pid,
> +		sizeof (int),
> +		OBJDB_VALUETYPE_INT32);
> +
> +	objdb->object_key_create_ext (object_handle,
> +		"responses",
> +		&zero_32,
> +		sizeof (int),
> +		OBJDB_VALUETYPE_UINT32);
> +
> +	objdb->object_key_create_ext (object_handle,
> +		"dispatched",
> +		&zero_32,
> +		sizeof (int),
> +		OBJDB_VALUETYPE_UINT32);
> +
> +	objdb->object_key_create_ext (object_handle,
> +		"requests",
> +		&zero_32,
> +		sizeof (int),
> +		OBJDB_VALUETYPE_INT32);
> +
> +	objdb->object_key_create_ext (object_handle,
> +		"sem_retry_count",
> +		&zero_32,
> +		sizeof (int),
> +		OBJDB_VALUETYPE_UINT32);
> +
> +	objdb->object_key_create_ext (object_handle,
> +		"send_retry_count",
> +		&zero_32,
> +		sizeof (int),
> +		OBJDB_VALUETYPE_UINT32);
> +
> +	objdb->object_key_create_ext (object_handle,
> +		"recv_retry_count",
> +		&zero_32,
> +		sizeof (int),
> +		OBJDB_VALUETYPE_UINT32);
> +
> +	return object_handle;
> +}
> +
> +static void corosync_stats_destroy_connection (hdb_handle_t handle)
> +{
> +	unsigned int key_incr_dummy;
> +
> +	objdb->object_destroy (handle);
> +
> +	objdb->object_key_increment (object_connection_handle,
> +								 "closed", strlen("closed"),
> +								 &key_incr_dummy);
> +	objdb->object_key_decrement (object_connection_handle,
> +								 "active", strlen("active"),
> +								 &key_incr_dummy);
> +}
> +
> +static void corosync_stats_update_value (hdb_handle_t handle,
> +			const char* name, const void* value, const size_t value_len)
> +{
> +	objdb->object_key_replace (handle,
> +							   name, strlen(name),
> +							   value, value_len);
> +}
> +
> +static void corosync_stats_increment_value (hdb_handle_t handle,
> +			const char* name)
> +{
> +	unsigned int key_incr_dummy;
> +	objdb->object_key_increment (handle,
> +								 name, strlen(name),
> +								 &key_incr_dummy);
> +}
> +
>  static struct coroipcs_init_state ipc_init_state = {
>  	.socket_name			= COROSYNC_SOCKET_NAME,
>  	.sched_policy			= SCHED_OTHER,
> @@ -689,7 +791,11 @@ static struct coroipcs_init_state ipc_init_state = {
>  	.poll_dispatch_modify		= corosync_poll_dispatch_modify,
>  	.init_fn_get			= corosync_init_fn_get,
>  	.exit_fn_get			= corosync_exit_fn_get,
> -	.handler_fn_get			= corosync_handler_fn_get
> +	.handler_fn_get			= corosync_handler_fn_get,
> +	.stats_create_connection = corosync_stats_create_connection,
> +	.stats_destroy_connection = corosync_stats_destroy_connection,
> +	.stats_update_value = corosync_stats_update_value,
> +	.stats_increment_value = corosync_stats_increment_value
Align = via tabs.

>  };

I like how this is done in principal.  However, we can't modify
ipc_init_state (breaks abi compatibility).  I recommend a new struct

struct coroipcs_init_stat_state ipc_stat_init_state

and a new API that sets these stat callback functions.

>  
>  static void corosync_setscheduler (void)
> @@ -734,6 +840,38 @@ static void corosync_setscheduler (void)
>  #endif
>  }
>  
> +static void corosync_stats_init (void)
> +{
> +	hdb_handle_t object_find_handle;
> +	hdb_handle_t object_runtime_handle;
> +	hdb_handle_t object_totem_handle;
> +	uint32_t zero_32 = 0;
> +
> +	objdb->object_find_create (
> +		OBJECT_PARENT_HANDLE,
> +		"runtime",
> +		strlen ("runtime"),
> +		&object_find_handle);
> +
> +	if (objdb->object_find_next (
> +			object_find_handle,
> +			&object_runtime_handle) != 0) {
> +		return;
> +	}
> +	/* Connection objects */
> +	objdb->object_create (object_runtime_handle,
> +						  &object_connection_handle,
> +						  "connections",
> +						  strlen ("connections"));
> +	objdb->object_key_create_ext (object_connection_handle,
> +								  "active", &zero_32, sizeof (zero_32),
> +								  OBJDB_VALUETYPE_UINT32);
> +	objdb->object_key_create_ext (object_connection_handle,
> +								  "closed", &zero_32, sizeof (zero_32),
> +								  OBJDB_VALUETYPE_UINT32);
> +
> +}
> +
>  void main_service_ready (void)
>  {
>  	int res;
> @@ -746,6 +884,7 @@ void main_service_ready (void)
>  		corosync_exit_error (AIS_DONE_INIT_SERVICES);
>  	}
>  	evil_init (api);
> +	corosync_stats_init ();

Where is this defined?  I don't see it in this patch..

>  }
>  
>  int main (int argc, char **argv)
> @@ -765,6 +904,7 @@ int main (int argc, char **argv)
>  	int background, setprio;
>  	struct stat stat_out;
>  	char corosync_lib_dir[PATH_MAX];
> +	hdb_handle_t object_runtime_handle;
>  
>  #if defined(HAVE_PTHREAD_SPIN_LOCK)
>  	pthread_spin_init (&serialize_spin, 0);
> @@ -977,6 +1117,11 @@ int main (int argc, char **argv)
>  		corosync_exit_error (AIS_DONE_MAINCONFIGREAD);
>  	}
>  
> +	/* create the main runtime object */
> +	objdb->object_create (OBJECT_PARENT_HANDLE,
> +			&object_runtime_handle,
> +			"runtime", strlen ("runtime"));
> +
>  	/*
>  	 * Now we are fully initialized.
>  	 */
> diff --git a/include/corosync/coroipcs.h b/include/corosync/coroipcs.h
> index 946b66d..5e8728a 100644
> --- a/include/corosync/coroipcs.h
> +++ b/include/corosync/coroipcs.h
> @@ -72,10 +72,17 @@ struct coroipcs_init_state {
>  	coroipcs_init_fn_lvalue (*init_fn_get)(unsigned int service);
>  	coroipcs_exit_fn_lvalue (*exit_fn_get)(unsigned int service);
>  	coroipcs_handler_fn_lvalue (*handler_fn_get)(unsigned int service, unsigned int id);
> +
> +	hdb_handle_t (*stats_create_connection) (const char* name,
> +			const pid_t pid, const int fd);
> +	void (*stats_destroy_connection) (hdb_handle_t handle);
> +	void (*stats_update_value) (hdb_handle_t handle,
> +			const char* name, const void* value, const size_t value_len);
> +	void (*stats_increment_value) (hdb_handle_t handle, const char* name);
>  };
as described before should be in separate function

>  
>  extern void coroipcs_ipc_init (
> -	struct coroipcs_init_state *init_state);
> +        struct coroipcs_init_state *init_state);

space added instead of tab

prob need a new function such as

extern void coroipcs_ipc_stats_init (
	struct coroipcs_init_stat_steate *init_stats_state);

>  
>  extern void *coroipcs_private_data_get (void *conn);
>  



More information about the Openais mailing list