[Openais] [PATCH 1/3] Stats-2 Add connection related statistics to the object db.
Steven Dake
sdake at redhat.com
Fri Oct 9 10:39:32 PDT 2009
comments inline. Great work, although needs a bit of rework for api
compat reasons.
On Thu, 2009-09-24 at 14:00 +1200, angus salkeld wrote:
> Hi
>
> This is a re-work of the connection related stats, to remove the
> dependence on objdb.
>
> -Angus
>
> Signed-off-by: Angus Salkeld <angus.salkeld at alliedtelesis.co.nz>
> ---
> exec/coroipcs.c | 110 ++++++++++++++++++++++++++++++++
> exec/main.c | 147 ++++++++++++++++++++++++++++++++++++++++++-
> include/corosync/coroipcs.h | 9 +++-
> 3 files changed, 264 insertions(+), 2 deletions(-)
>
> diff --git a/exec/coroipcs.c b/exec/coroipcs.c
> index ab7be0b..b01696a 100644
> --- a/exec/coroipcs.c
> +++ b/exec/coroipcs.c
> @@ -72,6 +72,7 @@
> #include <corosync/list.h>
>
> #include <corosync/coroipc_types.h>
> +#include <corosync/hdb.h>
> #include <corosync/coroipcs.h>
> #include <corosync/coroipc_ipc.h>
>
> @@ -130,11 +131,13 @@ enum conn_state {
> struct conn_info {
> int fd;
> pthread_t thread;
> + pid_t client_pid;
> pthread_attr_t thread_attr;
> unsigned int service;
> enum conn_state state;
> int notify_flow_control_enabled;
> int refcount;
> + hdb_handle_t stats_handle;
> #if _POSIX_THREAD_PROCESS_SHARED < 1
> key_t semkey;
> int semid;
> @@ -180,6 +183,8 @@ static void sem_post_exit_thread (struct conn_info *conn_info)
> retry_semop:
> res = sem_post (&conn_info->control_buffer->sem0);
> if (res == -1 && errno == EINTR) {
> + api->stats_increment_value (conn_info->stats_handle,
> + "sem_retry_count");
> goto retry_semop;
> }
> #else
> @@ -190,6 +195,8 @@ retry_semop:
> retry_semop:
> res = semop (conn_info->semid, &sop, 1);
> if ((res == -1) && (errno == EINTR || errno == EAGAIN)) {
> + api->stats_increment_value (conn_info->stats_handle,
> + "sem_retry_count");
> goto retry_semop;
> }
> #endif
> @@ -437,6 +444,9 @@ static inline int conn_info_destroy (struct conn_info *conn_info)
> * Retry library exit function if busy
> */
> if (conn_info->state == CONN_STATE_THREAD_DESTROYED) {
> +
> + api->stats_destroy_connection (conn_info->stats_handle);
> +
> res = api->exit_fn_get (conn_info->service) (conn_info);
> if (res == -1) {
> api->serialize_unlock ();
> @@ -588,6 +598,8 @@ retry_semwait:
> pthread_exit (0);
> }
> if ((res == -1) && (errno == EINTR)) {
> + api->stats_increment_value (conn_info->stats_handle,
> + "sem_retry_count");
> goto retry_semwait;
> }
> #else
> @@ -602,6 +614,8 @@ retry_semop:
> }
> res = semop (conn_info->semid, &sop, 1);
> if ((res == -1) && (errno == EINTR || errno == EAGAIN)) {
> + api->stats_increment_value (conn_info->stats_handle,
> + "sem_retry_count");
> goto retry_semop;
> } else
> if ((res == -1) && (errno == EINVAL || errno == EIDRM)) {
> @@ -639,12 +653,17 @@ retry_semop:
> } else
> if (send_ok) {
> api->serialize_lock();
> +
> + api->stats_increment_value (conn_info->stats_handle,
> + "requests");
> api->handler_fn_get (conn_info->service, header->id) (conn_info, header);
> api->serialize_unlock();
> } else {
> /*
> * Overload, tell library to retry
> */
> + api->stats_increment_value (conn_info->stats_handle,
> + "sem_retry_count");
> coroipc_response_header.size = sizeof (coroipc_response_header_t);
> coroipc_response_header.id = 0;
> coroipc_response_header.error = CS_ERR_TRY_AGAIN;
> @@ -672,9 +691,13 @@ req_setup_send (
> retry_send:
> res = send (conn_info->fd, &res_setup, sizeof (mar_res_setup_t), MSG_WAITALL);
> if (res == -1 && errno == EINTR) {
> + api->stats_increment_value (conn_info->stats_handle,
> + "send_retry_count");
> goto retry_send;
> } else
> if (res == -1 && errno == EAGAIN) {
> + api->stats_increment_value (conn_info->stats_handle,
> + "send_retry_count");
> goto retry_send;
> }
> return (0);
> @@ -719,6 +742,8 @@ req_setup_recv (
> retry_recv:
> res = recvmsg (conn_info->fd, &msg_recv, MSG_NOSIGNAL);
> if (res == -1 && errno == EINTR) {
> + api->stats_increment_value (conn_info->stats_handle,
> + "recv_retry_count");
> goto retry_recv;
> } else
> if (res == -1 && errno != EAGAIN) {
> @@ -785,6 +810,7 @@ retry_recv:
> assert (cmsg);
> cred = (struct ucred *)CMSG_DATA (cmsg);
> if (cred) {
> + conn_info->client_pid = cred->pid;
> if (api->security_valid (cred->uid, cred->gid)) {
> authenticated = 1;
> }
> @@ -838,6 +864,7 @@ static int conn_info_create (int fd)
> memset (conn_info, 0, sizeof (struct conn_info));
>
> conn_info->fd = fd;
> + conn_info->client_pid = 0;
> conn_info->service = SOCKET_SERVICE_INIT;
> conn_info->state = CONN_STATE_THREAD_INACTIVE;
> list_init (&conn_info->outq_head);
> @@ -1000,12 +1027,16 @@ int coroipcs_response_send (void *conn, const void *msg, size_t mlen)
> retry_semop:
> res = semop (conn_info->semid, &sop, 1);
> if ((res == -1) && (errno == EINTR || errno == EAGAIN)) {
> + api->stats_increment_value (conn_info->stats_handle,
> + "sem_retry_count");
> goto retry_semop;
> } else
> if ((res == -1) && (errno == EINVAL || errno == EIDRM)) {
> return (0);
> }
> #endif
> + api->stats_increment_value (conn_info->stats_handle,
> + "responses");
> return (0);
> }
>
> @@ -1038,12 +1069,16 @@ int coroipcs_response_iov_send (void *conn, const struct iovec *iov, unsigned in
> retry_semop:
> res = semop (conn_info->semid, &sop, 1);
> if ((res == -1) && (errno == EINTR || errno == EAGAIN)) {
> + api->stats_increment_value (conn_info->stats_handle,
> + "sem_retry_count");
> goto retry_semop;
> } else
> if ((res == -1) && (errno == EINVAL || errno == EIDRM)) {
> return (0);
> }
> #endif
> + api->stats_increment_value (conn_info->stats_handle,
> + "responses");
> return (0);
> }
>
> @@ -1115,12 +1150,16 @@ static void msg_send (void *conn, const struct iovec *iov, unsigned int iov_len,
> retry_semop:
> res = semop (conn_info->semid, &sop, 1);
> if ((res == -1) && (errno == EINTR || errno == EAGAIN)) {
> + api->stats_increment_value (conn_info->stats_handle,
> + "sem_retry_count");
> goto retry_semop;
> } else
> if ((res == -1) && (errno == EINVAL || errno == EIDRM)) {
> return;
> }
> #endif
> + api->stats_increment_value (conn_info->stats_handle,
> + "dispatched");
> }
>
> static void outq_flush (struct conn_info *conn_info) {
> @@ -1173,9 +1212,13 @@ retry_recv:
> sizeof (mar_req_priv_change),
> MSG_NOSIGNAL);
> if (res == -1 && errno == EINTR) {
> + api->stats_increment_value (conn_info->stats_handle,
> + "recv_retry_count");
> goto retry_recv;
> }
> if (res == -1 && errno == EAGAIN) {
> + api->stats_increment_value (conn_info->stats_handle,
> + "recv_retry_count");
> goto retry_recv;
> }
> if (res == -1 && errno != EAGAIN) {
> @@ -1346,6 +1389,70 @@ retry_accept:
> return (0);
> }
>
> +static char * pid_to_name (pid_t pid, char* out_name, size_t name_len)
> +{
> + char *name;
> + char *rest;
> + FILE *fp;
> + char fname[32];
> + char buf[256];
> +
> + snprintf (fname, 32, "/proc/%d/stat", pid);
> + fp = fopen (fname, "r");
> + if (!fp) {
> + return NULL;
> + }
> +
> + if (fgets (buf, sizeof (buf), fp) == NULL) {
> + fclose (fp);
> + return NULL;
> + }
> + fclose (fp);
> +
> + name = strrchr (buf, '(');
> + if (!name) {
> + return NULL;
> + }
> +
> + /* move past the bracket */
> + name++;
> +
> + rest = strrchr (buf, ')');
> +
> + if (rest == NULL || rest[1] != ' ') {
> + return NULL;
> + }
> +
> + *rest = '\0';
> + /* move past the NULL and space */
> + rest += 2;
> +
> + /* copy the name */
> + strncpy (out_name, name, name_len);
> + out_name[name_len] = '\0';
> + return out_name;
> +}
> +
yikes. I looked for a posix api to produce this information but
couldn't find one... How does this work on OS without proc?
> +static void coroipcs_init_conn_stats (
> + struct conn_info * conn)
> +{
> + char conn_name[42];
> + char proc_name[32];
> + int res;
> +
> + if (conn->client_pid > 0) {
> + if (pid_to_name (conn->client_pid, proc_name, sizeof(proc_name)))
> + snprintf (conn_name, sizeof(conn_name), "%s:%d:%d", proc_name, conn->client_pid, conn->fd);
> + else
> + snprintf (conn_name, sizeof(conn_name), "%d:%d", conn->client_pid, conn->fd);
> + } else
> + snprintf (conn_name, sizeof(conn_name), "%d", conn->fd);
> +
> + conn->stats_handle = api->stats_create_connection (conn_name, conn->client_pid, conn->fd);
> + api->stats_update_value (conn->stats_handle, "service_id",
> + &conn->service, sizeof(conn->service));
> +}
> +
> int coroipcs_handler_dispatch (
> int fd,
> int revent,
> @@ -1447,6 +1554,9 @@ int coroipcs_handler_dispatch (
>
> api->init_fn_get (conn_info->service) (conn_info);
>
> + /* create stats objects */
> + coroipcs_init_conn_stats (conn_info);
> +
> pthread_attr_init (&conn_info->thread_attr);
> /*
> * IA64 needs more stack space then other arches
> diff --git a/exec/main.c b/exec/main.c
> index a85aa55..82ab44d 100644
> --- a/exec/main.c
> +++ b/exec/main.c
> @@ -129,6 +129,8 @@ static hdb_handle_t corosync_poll_handle;
>
> struct sched_param global_sched_param;
>
> +static hdb_handle_t object_connection_handle;
> +
> hdb_handle_t corosync_poll_handle_get (void)
> {
> return (corosync_poll_handle);
> @@ -461,6 +463,7 @@ static void deliver_fn (
> ((void *)msg);
> }
>
> +
random cr
> ais_service[service]->exec_engine[fn_id].exec_handler_fn
> (msg, nodeid);
>
> @@ -669,6 +672,105 @@ static void corosync_poll_dispatch_modify (
> corosync_poll_handler_dispatch);
> }
>
> +
> +static hdb_handle_t corosync_stats_create_connection (const char* name,
> + const pid_t pid, const int fd)
> +{
> + uint32_t zero_32 = 0;
> + unsigned int key_incr_dummy;
> + hdb_handle_t object_handle;
> +
> + objdb->object_key_increment (object_connection_handle,
> + "active", strlen("active"),
> + &key_incr_dummy);
> +
> + objdb->object_create (object_connection_handle,
> + &object_handle,
> + name,
> + strlen (name));
> +
> + objdb->object_key_create_ext (object_handle,
> + "service_id",
> + &zero_32,
> + sizeof (zero_32),
> + OBJDB_VALUETYPE_UINT32);
> +
> + objdb->object_key_create_ext (object_handle,
> + "client_pid",
> + &pid,
> + sizeof (int),
> + OBJDB_VALUETYPE_INT32);
> +
> + objdb->object_key_create_ext (object_handle,
> + "responses",
> + &zero_32,
> + sizeof (int),
> + OBJDB_VALUETYPE_UINT32);
> +
> + objdb->object_key_create_ext (object_handle,
> + "dispatched",
> + &zero_32,
> + sizeof (int),
> + OBJDB_VALUETYPE_UINT32);
> +
> + objdb->object_key_create_ext (object_handle,
> + "requests",
> + &zero_32,
> + sizeof (int),
> + OBJDB_VALUETYPE_INT32);
> +
> + objdb->object_key_create_ext (object_handle,
> + "sem_retry_count",
> + &zero_32,
> + sizeof (int),
> + OBJDB_VALUETYPE_UINT32);
> +
> + objdb->object_key_create_ext (object_handle,
> + "send_retry_count",
> + &zero_32,
> + sizeof (int),
> + OBJDB_VALUETYPE_UINT32);
> +
> + objdb->object_key_create_ext (object_handle,
> + "recv_retry_count",
> + &zero_32,
> + sizeof (int),
> + OBJDB_VALUETYPE_UINT32);
> +
> + return object_handle;
> +}
> +
> +static void corosync_stats_destroy_connection (hdb_handle_t handle)
> +{
> + unsigned int key_incr_dummy;
> +
> + objdb->object_destroy (handle);
> +
> + objdb->object_key_increment (object_connection_handle,
> + "closed", strlen("closed"),
> + &key_incr_dummy);
> + objdb->object_key_decrement (object_connection_handle,
> + "active", strlen("active"),
> + &key_incr_dummy);
> +}
> +
> +static void corosync_stats_update_value (hdb_handle_t handle,
> + const char* name, const void* value, const size_t value_len)
> +{
> + objdb->object_key_replace (handle,
> + name, strlen(name),
> + value, value_len);
> +}
> +
> +static void corosync_stats_increment_value (hdb_handle_t handle,
> + const char* name)
> +{
> + unsigned int key_incr_dummy;
> + objdb->object_key_increment (handle,
> + name, strlen(name),
> + &key_incr_dummy);
> +}
> +
> static struct coroipcs_init_state ipc_init_state = {
> .socket_name = COROSYNC_SOCKET_NAME,
> .sched_policy = SCHED_OTHER,
> @@ -689,7 +791,11 @@ static struct coroipcs_init_state ipc_init_state = {
> .poll_dispatch_modify = corosync_poll_dispatch_modify,
> .init_fn_get = corosync_init_fn_get,
> .exit_fn_get = corosync_exit_fn_get,
> - .handler_fn_get = corosync_handler_fn_get
> + .handler_fn_get = corosync_handler_fn_get,
> + .stats_create_connection = corosync_stats_create_connection,
> + .stats_destroy_connection = corosync_stats_destroy_connection,
> + .stats_update_value = corosync_stats_update_value,
> + .stats_increment_value = corosync_stats_increment_value
Align = via tabs.
> };
I like how this is done in principal. However, we can't modify
ipc_init_state (breaks abi compatibility). I recommend a new struct
struct coroipcs_init_stat_state ipc_stat_init_state
and a new API that sets these stat callback functions.
>
> static void corosync_setscheduler (void)
> @@ -734,6 +840,38 @@ static void corosync_setscheduler (void)
> #endif
> }
>
> +static void corosync_stats_init (void)
> +{
> + hdb_handle_t object_find_handle;
> + hdb_handle_t object_runtime_handle;
> + hdb_handle_t object_totem_handle;
> + uint32_t zero_32 = 0;
> +
> + objdb->object_find_create (
> + OBJECT_PARENT_HANDLE,
> + "runtime",
> + strlen ("runtime"),
> + &object_find_handle);
> +
> + if (objdb->object_find_next (
> + object_find_handle,
> + &object_runtime_handle) != 0) {
> + return;
> + }
> + /* Connection objects */
> + objdb->object_create (object_runtime_handle,
> + &object_connection_handle,
> + "connections",
> + strlen ("connections"));
> + objdb->object_key_create_ext (object_connection_handle,
> + "active", &zero_32, sizeof (zero_32),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (object_connection_handle,
> + "closed", &zero_32, sizeof (zero_32),
> + OBJDB_VALUETYPE_UINT32);
> +
> +}
> +
> void main_service_ready (void)
> {
> int res;
> @@ -746,6 +884,7 @@ void main_service_ready (void)
> corosync_exit_error (AIS_DONE_INIT_SERVICES);
> }
> evil_init (api);
> + corosync_stats_init ();
Where is this defined? I don't see it in this patch..
> }
>
> int main (int argc, char **argv)
> @@ -765,6 +904,7 @@ int main (int argc, char **argv)
> int background, setprio;
> struct stat stat_out;
> char corosync_lib_dir[PATH_MAX];
> + hdb_handle_t object_runtime_handle;
>
> #if defined(HAVE_PTHREAD_SPIN_LOCK)
> pthread_spin_init (&serialize_spin, 0);
> @@ -977,6 +1117,11 @@ int main (int argc, char **argv)
> corosync_exit_error (AIS_DONE_MAINCONFIGREAD);
> }
>
> + /* create the main runtime object */
> + objdb->object_create (OBJECT_PARENT_HANDLE,
> + &object_runtime_handle,
> + "runtime", strlen ("runtime"));
> +
> /*
> * Now we are fully initialized.
> */
> diff --git a/include/corosync/coroipcs.h b/include/corosync/coroipcs.h
> index 946b66d..5e8728a 100644
> --- a/include/corosync/coroipcs.h
> +++ b/include/corosync/coroipcs.h
> @@ -72,10 +72,17 @@ struct coroipcs_init_state {
> coroipcs_init_fn_lvalue (*init_fn_get)(unsigned int service);
> coroipcs_exit_fn_lvalue (*exit_fn_get)(unsigned int service);
> coroipcs_handler_fn_lvalue (*handler_fn_get)(unsigned int service, unsigned int id);
> +
> + hdb_handle_t (*stats_create_connection) (const char* name,
> + const pid_t pid, const int fd);
> + void (*stats_destroy_connection) (hdb_handle_t handle);
> + void (*stats_update_value) (hdb_handle_t handle,
> + const char* name, const void* value, const size_t value_len);
> + void (*stats_increment_value) (hdb_handle_t handle, const char* name);
> };
as described before should be in separate function
>
> extern void coroipcs_ipc_init (
> - struct coroipcs_init_state *init_state);
> + struct coroipcs_init_state *init_state);
space added instead of tab
prob need a new function such as
extern void coroipcs_ipc_stats_init (
struct coroipcs_init_stat_steate *init_stats_state);
>
> extern void *coroipcs_private_data_get (void *conn);
>
More information about the Openais
mailing list