[Openais] [PATCH 3/3] Add totem statistics to objdb.
Steven Dake
sdake at redhat.com
Fri Oct 9 10:21:26 PDT 2009
This is an exceptional idea and I really like the concepts. Some of the
implementation needs a little rework. Those comments are inline. Once
the rework is done this patch should be ready for commit.
Also I couldn't find it in the patch, but new functions such as
totem_stats_get should go at the end of the corosync_api_v1 definition,
otherwise existing lcrso binaries that compiled against coroapi will
point at the wrong pointers.
Regards
-steve
On Thu, 2009-09-24 at 14:04 +1200, angus salkeld wrote:
> Hi
>
> -Added rx_msg_dropped
> -cleaned up TODO's
> -use calloc instead of malloc to zero out the struct.
>
> -Angus
>
> Signed-off-by: Angus Salkeld <angus.salkeld at alliedtelesis.co.nz>
> ---
> exec/apidef.c | 1 +
> exec/main.c | 312 +++++++++++++++++++++++++++++++++++++
> exec/totemmrp.c | 4 +-
> exec/totemmrp.h | 1 +
> exec/totempg.c | 10 ++
> exec/totemsrp.c | 104 ++++++++++++
> exec/totemsrp.h | 1 +
> include/corosync/engine/coroapi.h | 2 +
> include/corosync/totem/totem.h | 70 ++++++++
> include/corosync/totem/totempg.h | 2 +
> 10 files changed, 506 insertions(+), 1 deletions(-)
>
> diff --git a/exec/apidef.c b/exec/apidef.c
> index 9916d62..6699484 100644
> --- a/exec/apidef.c
> +++ b/exec/apidef.c
> @@ -117,6 +117,7 @@ static struct corosync_api_v1 apidef_corosync_api_v1 = {
> .totem_ip_print = totemip_print,
> .totem_crypto_set = totempg_crypto_set,
> .totem_callback_token_create = totempg_callback_token_create,
> + .totem_get_stats = totempg_get_stats,
> .tpg_init = totempg_groups_initialize,
> .tpg_exit = NULL, /* missing from totempg api */
> .tpg_join = (typedef_tpg_join)totempg_groups_join,
> diff --git a/exec/main.c b/exec/main.c
> index c4ea700..b350e85 100644
> --- a/exec/main.c
> +++ b/exec/main.c
> @@ -129,6 +129,7 @@ static hdb_handle_t corosync_poll_handle;
>
> struct sched_param global_sched_param;
>
> +static poll_timer_handle corosync_stats_timer_handle;
Should use api->timer_add_duration vs poll_timer_handle for the polling
of information.
> static hdb_handle_t object_connection_handle;
>
> hdb_handle_t corosync_poll_handle_get (void)
> @@ -429,6 +430,7 @@ static void deliver_fn (
> int fn_id;
> unsigned int id;
> unsigned int size;
> + unsigned int key_incr_dummy;
I can't tell from patched srcs if this variable is used. Is it?
>
> header = msg;
> if (endian_conversion_required) {
> @@ -857,12 +859,168 @@ static void corosync_setscheduler (void)
> #endif
> }
>
> +
> +static void corosync_totem_stats_updater (void *data)
> +{
> + totempg_stats_t * stats;
> + uint32_t mtt_rx_token;
> + uint32_t total_mtt_rx_token;
> + uint32_t avg_backlog_calc;
> + uint32_t total_backlog_calc;
> + uint32_t avg_token_holdtime;
> + uint32_t total_token_holdtime;
> + int t, prev;
> + int32_t token_count;
> +
> + stats = api->totem_get_stats();
> +
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "orf_token_tx", strlen("orf_token_tx"),
> + &stats->mrp->srp->orf_token_tx,
> + sizeof(stats->mrp->srp->orf_token_tx));
all these should be two tabstops rather then 6-7 - makes code hard to
read in vi. comment applies to all object_key operations.
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "orf_token_rx", strlen("orf_token_rx"),
> + &stats->mrp->srp->orf_token_rx,
> + sizeof(stats->mrp->srp->orf_token_rx));
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "memb_merge_detect_tx", strlen("memb_merge_detect_tx"),
> + &stats->mrp->srp->memb_merge_detect_tx,
> + sizeof(stats->mrp->srp->memb_merge_detect_tx));
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "memb_merge_detect_rx",strlen("memb_merge_detect_rx"),
> + &stats->mrp->srp->memb_merge_detect_rx,
> + sizeof(stats->mrp->srp->memb_merge_detect_rx));
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "memb_join_tx",strlen("memb_join_tx"),
> + &stats->mrp->srp->memb_join_tx,
> + sizeof(stats->mrp->srp->memb_join_tx));
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "memb_join_rx",strlen("memb_join_rx"),
> + &stats->mrp->srp->memb_join_rx,
> + sizeof(stats->mrp->srp->memb_join_rx));
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "mcast_tx",strlen("mcast_tx"),
> + &stats->mrp->srp->mcast_tx,
> + sizeof(stats->mrp->srp->mcast_tx));
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "mcast_retx",strlen("mcast_retx"),
> + &stats->mrp->srp->mcast_retx,
> + sizeof(stats->mrp->srp->mcast_retx));
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "mcast_rx",strlen("mcast_rx"),
> + &stats->mrp->srp->mcast_rx,
> + sizeof(stats->mrp->srp->mcast_rx));
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "memb_commit_token_tx",strlen("memb_commit_token_tx"),
> + &stats->mrp->srp->memb_commit_token_tx,
> + sizeof(stats->mrp->srp->memb_commit_token_tx));
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "memb_commit_token_rx",strlen("memb_commit_token_rx"),
> + &stats->mrp->srp->memb_commit_token_rx,
> + sizeof(stats->mrp->srp->memb_commit_token_rx));
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "token_hold_cancel_tx",strlen("token_hold_cancel_tx"),
> + &stats->mrp->srp->token_hold_cancel_tx,
> + sizeof(stats->mrp->srp->token_hold_cancel_tx));
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "token_hold_cancel_rx",strlen("token_hold_cancel_rx"),
> + &stats->mrp->srp->token_hold_cancel_rx,
> + sizeof(stats->mrp->srp->token_hold_cancel_rx));
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "operational_entered",strlen("operational_entered"),
> + &stats->mrp->srp->operational_entered,
> + sizeof(stats->mrp->srp->operational_entered));
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "operational_token_lost",strlen("operational_token_lost"),
> + &stats->mrp->srp->operational_token_lost,
> + sizeof(stats->mrp->srp->operational_token_lost));
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "gather_entered",strlen("gather_entered"),
> + &stats->mrp->srp->gather_entered,
> + sizeof(stats->mrp->srp->gather_entered));
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "gather_token_lost",strlen("gather_token_lost"),
> + &stats->mrp->srp->gather_token_lost,
> + sizeof(stats->mrp->srp->gather_token_lost));
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "commit_entered",strlen("commit_entered"),
> + &stats->mrp->srp->commit_entered,
> + sizeof(stats->mrp->srp->commit_entered));
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "commit_token_lost",strlen("commit_token_lost"),
> + &stats->mrp->srp->commit_token_lost,
> + sizeof(stats->mrp->srp->commit_token_lost));
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "recovery_entered",strlen("recovery_entered"),
> + &stats->mrp->srp->recovery_entered,
> + sizeof(stats->mrp->srp->recovery_entered));
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "recovery_token_lost",strlen("recovery_token_lost"),
> + &stats->mrp->srp->recovery_token_lost,
> + sizeof(stats->mrp->srp->recovery_token_lost));
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "consensus_timeouts",strlen("consensus_timeouts"),
> + &stats->mrp->srp->consensus_timeouts,
> + sizeof(stats->mrp->srp->consensus_timeouts));
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "rx_msg_dropped", strlen("rx_msg_dropped"),
> + &stats->mrp->srp->rx_msg_dropped,
> + sizeof(stats->mrp->srp->rx_msg_dropped));
> +
> + total_mtt_rx_token = 0;
> + total_token_holdtime = 0;
> + total_backlog_calc = 0;
> + token_count = 0;
> + t = stats->mrp->srp->latest_token;
> + while (1) {
> + if (t == 0)
> + prev = TOTEM_TOKEN_STATS_MAX - 1;
> + else
> + prev = t - 1;
> + if (prev == stats->mrp->srp->earliest_token)
> + break;
> + /* if tx == 0, then dropped token (not ours) */
> + if (stats->mrp->srp->token[t].tx != 0 ||
> + (stats->mrp->srp->token[t].rx - stats->mrp->srp->token[prev].rx) > 0 ) {
> + total_mtt_rx_token += (stats->mrp->srp->token[t].rx - stats->mrp->srp->token[prev].rx);
> + total_token_holdtime += (stats->mrp->srp->token[t].tx - stats->mrp->srp->token[t].rx);
> + total_backlog_calc += stats->mrp->srp->token[t].backlog_calc;
> + token_count++;
> + }
> + t = prev;
> + }
> + mtt_rx_token = (total_mtt_rx_token / token_count);
> + avg_backlog_calc = (total_backlog_calc / token_count);
> + avg_token_holdtime = (total_token_holdtime / token_count);
> +
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "mtt_rx_token",strlen("mtt_rx_token"),
> + &mtt_rx_token,
> + sizeof(mtt_rx_token));
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "avg_token_workload",strlen("avg_token_workload"),
> + &avg_token_holdtime,
> + sizeof(avg_backlog_calc));
> + objdb->object_key_replace (stats->mrp->srp->hdr.handle,
> + "avg_backlog_calc",strlen("avg_backlog_calc"),
> + &avg_backlog_calc,
> + sizeof(avg_backlog_calc));
> +
> +
Should use api->timer_duration_add if possible.
> + poll_timer_add (corosync_poll_handle,
> + 1500, NULL,
> + corosync_totem_stats_updater,
> + &corosync_stats_timer_handle);
> +}
> +
> static void corosync_stats_init (void)
> {
> hdb_handle_t object_find_handle;
> hdb_handle_t object_runtime_handle;
> hdb_handle_t object_totem_handle;
> + totempg_stats_t * stats;
> uint32_t zero_32 = 0;
> + uint64_t zero_64 = 0;
>
> objdb->object_find_create (
> OBJECT_PARENT_HANDLE,
> @@ -887,8 +1045,162 @@ static void corosync_stats_init (void)
> "closed", &zero_32, sizeof (zero_32),
> OBJDB_VALUETYPE_UINT32);
>
> + /* Totem objects */
> + stats = api->totem_get_stats();
> +
> + objdb->object_create (object_runtime_handle,
> + &object_totem_handle,
> + "totem", strlen ("totem"));
> + objdb->object_create (object_totem_handle,
> + &stats->hdr.handle,
> + "pg", strlen ("pg"));
> + objdb->object_create (stats->hdr.handle,
> + &stats->mrp->hdr.handle,
> + "mrp", strlen ("mrp"));
> + objdb->object_create (stats->mrp->hdr.handle,
> + &stats->mrp->srp->hdr.handle,
> + "srp", strlen ("srp"));
> +
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "orf_token_tx",
> + &stats->mrp->srp->orf_token_tx,
> + sizeof(stats->mrp->srp->orf_token_tx),
> + OBJDB_VALUETYPE_UINT32);
Can we call this object_key_create_ext function object_key_create_typed
instead?
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "orf_token_rx",
> + &stats->mrp->srp->orf_token_rx,
> + sizeof(stats->mrp->srp->orf_token_rx),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "memb_merge_detect_tx",
> + &stats->mrp->srp->memb_merge_detect_tx,
> + sizeof(stats->mrp->srp->memb_merge_detect_tx),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "memb_merge_detect_rx",
> + &stats->mrp->srp->memb_merge_detect_rx,
> + sizeof(stats->mrp->srp->memb_merge_detect_rx),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "memb_join_tx",
> + &stats->mrp->srp->memb_join_tx,
> + sizeof(stats->mrp->srp->memb_join_tx),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "memb_join_rx",
> + &stats->mrp->srp->memb_join_rx,
> + sizeof(stats->mrp->srp->memb_join_rx),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "mcast_tx",
> + &stats->mrp->srp->mcast_tx,
> + sizeof(stats->mrp->srp->mcast_tx),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "mcast_retx",
> + &stats->mrp->srp->mcast_retx,
> + sizeof(stats->mrp->srp->mcast_retx),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "mcast_rx",
> + &stats->mrp->srp->mcast_rx,
> + sizeof(stats->mrp->srp->mcast_rx),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "memb_commit_token_tx",
> + &stats->mrp->srp->memb_commit_token_tx,
> + sizeof(stats->mrp->srp->memb_commit_token_tx),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "memb_commit_token_rx",
> + &stats->mrp->srp->memb_commit_token_rx,
> + sizeof(stats->mrp->srp->memb_commit_token_rx),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "token_hold_cancel_tx",
> + &stats->mrp->srp->token_hold_cancel_tx,
> + sizeof(stats->mrp->srp->token_hold_cancel_tx),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "token_hold_cancel_rx",
> + &stats->mrp->srp->token_hold_cancel_rx,
> + sizeof(stats->mrp->srp->token_hold_cancel_rx),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "operational_entered",
> + &stats->mrp->srp->operational_entered,
> + sizeof(stats->mrp->srp->operational_entered),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "operational_token_lost",
> + &stats->mrp->srp->operational_token_lost,
> + sizeof(stats->mrp->srp->operational_token_lost),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "gather_entered",
> + &stats->mrp->srp->gather_entered,
> + sizeof(stats->mrp->srp->gather_entered),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "gather_token_lost",
> + &stats->mrp->srp->gather_token_lost,
> + sizeof(stats->mrp->srp->gather_token_lost),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "commit_entered",
> + &stats->mrp->srp->commit_entered,
> + sizeof(stats->mrp->srp->commit_entered),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "commit_token_lost",
> + &stats->mrp->srp->commit_token_lost,
> + sizeof(stats->mrp->srp->commit_token_lost),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "recovery_entered",
> + &stats->mrp->srp->recovery_entered,
> + sizeof(stats->mrp->srp->recovery_entered),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "recovery_token_lost",
> + &stats->mrp->srp->recovery_token_lost,
> + sizeof(stats->mrp->srp->recovery_token_lost),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "consensus_timeouts",
> + &stats->mrp->srp->consensus_timeouts,
> + sizeof(stats->mrp->srp->consensus_timeouts),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "rx_msg_dropped",
> + &stats->mrp->srp->rx_msg_dropped,
> + sizeof(stats->mrp->srp->rx_msg_dropped),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "mtt_rx_token",
> + &zero_32,
> + sizeof(zero_32),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "avg_token_workload",
> + &zero_32,
> + sizeof(zero_32),
> + OBJDB_VALUETYPE_UINT32);
> + objdb->object_key_create_ext (stats->mrp->srp->hdr.handle,
> + "avg_backlog_calc",
> + &zero_64,
> + sizeof(zero_64),
> + OBJDB_VALUETYPE_UINT32);
> +
> + /* start stats timer */
> + poll_timer_add (corosync_poll_handle,
> + 1500, NULL,
> + corosync_totem_stats_updater,
> + &corosync_stats_timer_handle);
> +
> }
>
> +
> void main_service_ready (void)
> {
> int res;
> diff --git a/exec/totemmrp.c b/exec/totemmrp.c
> index d014300..92db794 100644
> --- a/exec/totemmrp.c
> +++ b/exec/totemmrp.c
> @@ -57,7 +57,6 @@
>
> #include <corosync/totem/totem.h>
> #include <corosync/totem/coropoll.h>
> -#include <corosync/hdb.h>
>
> #include "totemmrp.h"
> #include "totemsrp.h"
> @@ -119,6 +118,7 @@ void totemmrp_confchg_fn (
> int totemmrp_initialize (
> hdb_handle_t poll_handle,
> struct totem_config *totem_config,
> + totemmrp_stats_t *stats,
>
> void (*deliver_fn) (
> unsigned int nodeid,
> @@ -136,10 +136,12 @@ int totemmrp_initialize (
> pg_deliver_fn = deliver_fn;
> pg_confchg_fn = confchg_fn;
>
> + stats->srp = calloc (sizeof(totemsrp_stats_t), 1);
> result = totemsrp_initialize (
> poll_handle,
> &totemsrp_context,
> totem_config,
> + stats->srp,
> totemmrp_deliver_fn,
> totemmrp_confchg_fn);
>
> diff --git a/exec/totemmrp.h b/exec/totemmrp.h
> index aa19c1b..688c625 100644
> --- a/exec/totemmrp.h
> +++ b/exec/totemmrp.h
> @@ -58,6 +58,7 @@ extern void totemmrp_log_printf_init (
> extern int totemmrp_initialize (
> hdb_handle_t poll_handle,
> struct totem_config *totem_config,
> + totemmrp_stats_t *stats,
>
> void (*deliver_fn) (
> unsigned int nodeid,
> diff --git a/exec/totempg.c b/exec/totempg.c
> index 77f532b..dbec853 100644
> --- a/exec/totempg.c
> +++ b/exec/totempg.c
> @@ -175,6 +175,8 @@ static void (*totempg_log_printf) (
>
> struct totem_config *totempg_totem_config;
>
> +static totempg_stats_t totempg_stats;
> +
> enum throw_away_mode {
> THROW_AWAY_INACTIVE,
> THROW_AWAY_ACTIVE
> @@ -723,9 +725,12 @@ int totempg_initialize (
>
> totemsrp_net_mtu_adjust (totem_config);
>
> + totempg_stats.mrp = malloc (sizeof(totemmrp_stats_t));
> +
> res = totemmrp_initialize (
> poll_handle,
> totem_config,
> + totempg_stats.mrp,
> totempg_deliver_fn,
> totempg_confchg_fn);
>
> @@ -1304,6 +1309,11 @@ int totempg_ifaces_get (
> return (res);
> }
>
> +totempg_stats_t* totempg_get_stats (void)
> +{
> + return &totempg_stats;
> +}
> +
> int totempg_crypto_set (
> unsigned int type)
> {
> diff --git a/exec/totemsrp.c b/exec/totemsrp.c
> index a7c1eac..6b1cfde 100644
> --- a/exec/totemsrp.c
> +++ b/exec/totemsrp.c
> @@ -501,6 +501,7 @@ struct totemsrp_instance {
>
> struct memb_commit_token *commit_token;
>
> + totemsrp_stats_t *stats;
> char commit_token_storage[9000];
> };
>
> @@ -707,6 +708,46 @@ static int pause_flush (struct totemsrp_instance *instance)
> return (res);
> }
>
> +static int token_event_stats_collector (enum totem_callback_token_type type, const void *void_instance)
> +{
> + struct totemsrp_instance *instance = (struct totemsrp_instance *)void_instance;
> + struct timeval tv;
> + uint32_t time_now;
> +
gettimeofday usage in corosync is deprecated for calculating time
differences. take a look at the function timerlist_nano_current_get in
tlist.h (included in totemsrp.c).
> + if (gettimeofday(&tv, 0)) {
> + return -1;
> + }
> +
> + time_now = tv.tv_sec * 1000;
> + time_now += tv.tv_usec / 1000; /* milli seconds */
> +
> + if (type == TOTEM_CALLBACK_TOKEN_RECEIVED) {
> + /* incr latest token the index */
> + if (instance->stats->latest_token == (TOTEM_TOKEN_STATS_MAX - 1))
> + instance->stats->latest_token = 0;
> + else
> + instance->stats->latest_token++;
> +
> + if (instance->stats->earliest_token == instance->stats->latest_token) {
> + /* we have filled up the array, start overwriting */
> + if (instance->stats->earliest_token == (TOTEM_TOKEN_STATS_MAX - 1))
> + instance->stats->earliest_token = 0;
> + else
> + instance->stats->earliest_token++;
> +
> + instance->stats->token[instance->stats->earliest_token].rx = 0;
> + instance->stats->token[instance->stats->earliest_token].tx = 0;
> + instance->stats->token[instance->stats->earliest_token].backlog_calc = 0;
> + }
> +
> + instance->stats->token[instance->stats->latest_token].rx = time_now;
> + instance->stats->token[instance->stats->latest_token].tx = 0; /* in case we drop the token */
> + } else {
> + instance->stats->token[instance->stats->latest_token].tx = time_now;
> + }
> + return 0;
> +}
> +
> /*
> * Exported interfaces
> */
> @@ -714,6 +755,7 @@ int totemsrp_initialize (
> hdb_handle_t poll_handle,
> void **srp_context,
> struct totem_config *totem_config,
> + totemsrp_stats_t *stats,
>
> void (*deliver_fn) (
> unsigned int nodeid,
> @@ -753,6 +795,10 @@ int totemsrp_initialize (
>
> totemsrp_instance_initialize (instance);
>
> + instance->stats = stats;
> + instance->stats->latest_token = 0;
> + instance->stats->earliest_token = 0;
> +
> instance->totem_config = totem_config;
>
> /*
> @@ -881,6 +927,18 @@ int totemsrp_initialize (
> MESSAGE_QUEUE_MAX,
> sizeof (struct message_item));
>
> + totemsrp_callback_token_create (instance,
> + &instance->stats->token_recv_event_handle,
> + TOTEM_CALLBACK_TOKEN_RECEIVED,
> + 0,
> + token_event_stats_collector,
> + instance);
> + totemsrp_callback_token_create (instance,
> + &instance->stats->token_sent_event_handle,
> + TOTEM_CALLBACK_TOKEN_SENT,
> + 0,
> + token_event_stats_collector,
> +
This logic is hard to follow and introduces extra locking and
performance penalties. I prefer if you call token_event_stats_collector
at start and end of message_handler_orf_token.
> instance);
> *srp_context = instance;
> return (0);
>
> @@ -1432,6 +1490,7 @@ static void memb_state_consensus_timeout_expired (
> struct srp_addr no_consensus_list[PROCESSOR_COUNT_MAX];
> int no_consensus_list_entries;
>
> + instance->stats->consensus_timeouts++;
> if (memb_consensus_agreed (instance)) {
> memb_consensus_reset (instance);
>
> @@ -1479,6 +1538,7 @@ static void timer_function_orf_token_timeout (void *data)
> "A processor failed, forming new configuration.\n");
> totemrrp_iface_check (instance->totemrrp_context);
> memb_state_gather_enter (instance, 2);
> + instance->stats->operational_token_lost++;
> break;
>
> case MEMB_STATE_GATHER:
> @@ -1486,12 +1546,14 @@ static void timer_function_orf_token_timeout (void *data)
> "The consensus timeout expired.\n");
> memb_state_consensus_timeout_expired (instance);
> memb_state_gather_enter (instance, 3);
> + instance->stats->gather_token_lost++;
> break;
>
> case MEMB_STATE_COMMIT:
> log_printf (instance->totemsrp_log_level_debug,
> "The token was lost in the COMMIT state.\n");
> memb_state_gather_enter (instance, 4);
> + instance->stats->commit_token_lost++;
> break;
>
> case MEMB_STATE_RECOVERY:
> @@ -1499,6 +1561,7 @@ static void timer_function_orf_token_timeout (void *data)
> "The token was lost in the RECOVERY state.\n");
> ring_state_restore (instance);
> memb_state_gather_enter (instance, 5);
> + instance->stats->recovery_token_lost++;
> break;
> }
> }
> @@ -1730,6 +1793,7 @@ static void memb_state_operational_enter (struct totemsrp_instance *instance)
> "A processor joined or left the membership and a new membership was formed.\n");
> instance->memb_state = MEMB_STATE_OPERATIONAL;
>
> + instance->stats->operational_entered++;
> instance->my_received_flg = 1;
>
> reset_pause_timeout (instance);
> @@ -1786,6 +1850,7 @@ static void memb_state_gather_enter (
> "entering GATHER state from %d.\n", gather_from);
>
> instance->memb_state = MEMB_STATE_GATHER;
> + instance->stats->gather_entered++;
>
> return;
> }
> @@ -1831,6 +1896,7 @@ static void memb_state_commit_enter (
> reset_token_retransmit_timeout (instance); // REVIEWED
> reset_token_timeout (instance); // REVIEWED
>
> + instance->stats->commit_entered++;
>
> /*
> * reset all flow control variables since we are starting a new ring
> @@ -2022,6 +2088,7 @@ originated:
> reset_token_retransmit_timeout (instance); // REVIEWED
>
> instance->memb_state = MEMB_STATE_RECOVERY;
> + instance->stats->recovery_entered++;
> return;
> }
>
> @@ -2083,6 +2150,7 @@ int totemsrp_mcast (
> message_item.msg_len = addr_idx;
>
> log_printf (instance->totemsrp_log_level_debug, "mcasted message added to pending queue\n");
> + instance->stats->mcast_tx++;
> cs_queue_item_add (&instance->new_message_queue, &message_item);
>
> return (0);
> @@ -2398,6 +2466,7 @@ static int orf_token_rtr (
> memmove (&rtr_list[i], &rtr_list[i + 1],
> sizeof (struct rtr_item) * (orf_token->rtr_list_entries));
>
> + instance->stats->mcast_retx++;
> instance->fcc_remcast_current++;
> } else {
> i += 1;
> @@ -2567,6 +2636,8 @@ static int token_hold_cancel_send (struct totemsrp_instance *instance)
> sizeof (struct memb_ring_id));
> assert (token_hold_cancel.header.nodeid);
>
> + instance->stats->token_hold_cancel_tx++;
> +
> totemrrp_mcast_flush_send (instance->totemrrp_context, &token_hold_cancel,
> sizeof (struct token_hold_cancel));
>
> @@ -2587,6 +2658,7 @@ static int orf_token_send_initial (struct totemsrp_instance *instance)
> orf_token.token_seq = SEQNO_START_TOKEN;
> orf_token.retrans_flg = 1;
> instance->my_set_retrans_flg = 1;
> + instance->stats->orf_token_tx++;
>
> if (cs_queue_is_empty (&instance->retrans_message_queue) == 1) {
> orf_token.retrans_flg = 0;
> @@ -2748,6 +2820,8 @@ static int memb_state_commit_token_send (
> memcpy (instance->orf_token_retransmit, instance->commit_token, commit_token_size);
> instance->orf_token_retransmit_size = commit_token_size;
>
> + instance->stats->memb_commit_token_tx++;
> +
> totemrrp_token_send (instance->totemrrp_context,
> instance->commit_token,
> commit_token_size);
> @@ -2882,6 +2956,8 @@ static void memb_join_message_send (struct totemsrp_instance *instance)
> usleep (random() % (instance->totem_config->send_join_timeout * 1000));
> }
>
> + instance->stats->memb_join_tx++;
> +
> totemrrp_mcast_flush_send (
> instance->totemrrp_context,
> memb_join,
> @@ -2950,6 +3026,7 @@ static void memb_leave_message_send (struct totemsrp_instance *instance)
> if (instance->totem_config->send_join_timeout) {
> usleep (random() % (instance->totem_config->send_join_timeout * 1000));
> }
> + instance->stats->memb_join_tx++;
>
> totemrrp_mcast_flush_send (
> instance->totemrrp_context,
> @@ -2970,6 +3047,7 @@ static void memb_merge_detect_transmit (struct totemsrp_instance *instance)
> sizeof (struct memb_ring_id));
> assert (memb_merge_detect.header.nodeid);
>
> + instance->stats->memb_merge_detect_tx++;
> totemrrp_mcast_flush_send (instance->totemrrp_context,
> &memb_merge_detect,
> sizeof (struct memb_merge_detect));
> @@ -3152,6 +3230,7 @@ static unsigned int backlog_get (struct totemsrp_instance *instance)
> if (instance->memb_state == MEMB_STATE_RECOVERY) {
> backlog = cs_queue_used (&instance->retrans_message_queue);
> }
> + instance->stats->token[instance->stats->latest_token].backlog_calc = backlog;
> return (backlog);
> }
>
> @@ -3704,10 +3783,12 @@ static int message_handler_mcast (
>
> case MEMB_STATE_COMMIT:
> /* discard message */
> + instance->stats->rx_msg_dropped++;
> break;
>
> case MEMB_STATE_RECOVERY:
> /* discard message */
> + instance->stats->rx_msg_dropped++;
> break;
> }
> return (0);
> @@ -4203,9 +4284,32 @@ void main_deliver_fn (
> if ((int)message_header->type >= totemsrp_message_handlers.count) {
> log_printf (instance->totemsrp_log_level_security, "Type of received message is wrong... ignoring %d.\n", (int)message_header->type);
> printf ("wrong message type\n");
> + instance->stats->rx_msg_dropped++;
> return;
> }
>
> + switch (message_header->type) {
> + case MESSAGE_TYPE_ORF_TOKEN:
> + instance->stats->orf_token_rx++;
> + break;
> + case MESSAGE_TYPE_MCAST:
> + instance->stats->mcast_rx++;
> + break;
> + case MESSAGE_TYPE_MEMB_MERGE_DETECT:
> + instance->stats->memb_merge_detect_rx++;
> + break;
> + case MESSAGE_TYPE_MEMB_JOIN:
> + instance->stats->memb_join_rx++;
> + break;
> + case MESSAGE_TYPE_MEMB_COMMIT_TOKEN:
> + instance->stats->memb_commit_token_rx++;
> + break;
> + case MESSAGE_TYPE_TOKEN_HOLD_CANCEL:
> + instance->stats->token_hold_cancel_rx++;
> + break;
> + default:
> + break;
> + }
> /*
> * Handle incoming message
> */
> diff --git a/exec/totemsrp.h b/exec/totemsrp.h
> index 743aaad..c056723 100644
> --- a/exec/totemsrp.h
> +++ b/exec/totemsrp.h
> @@ -50,6 +50,7 @@ int totemsrp_initialize (
> hdb_handle_t poll_handle,
> void **srp_context,
> struct totem_config *totem_config,
> + totemsrp_stats_t *stats,
>
> void (*deliver_fn) (
> unsigned int nodeid,
> diff --git a/include/corosync/engine/coroapi.h b/include/corosync/engine/coroapi.h
> index b13d652..84b8062 100644
> --- a/include/corosync/engine/coroapi.h
> +++ b/include/corosync/engine/coroapi.h
> @@ -472,6 +472,7 @@ struct corosync_api_v1 {
> const void *),
> const void *data);
>
> + void *(*totem_get_stats)(void);
> /*
> * Totem open process groups API for those service engines
> * wanting their own groups
> @@ -640,6 +641,7 @@ struct corosync_lib_handler {
> struct corosync_exec_handler {
> void (*exec_handler_fn) (const void *msg, unsigned int nodeid);
> void (*exec_endian_convert_fn) (void *msg);
> + hdb_handle_t stats_handle;
> };
This change cannot be made. I'm not sure how this variable is used, but
it breaks binary compatibility with the ABI because service engines
define corosync_lib_handlers as arrays statically.
>
> struct corosync_service_engine_iface_ver0 {
> diff --git a/include/corosync/totem/totem.h b/include/corosync/totem/totem.h
> index 0ef672d..fa7a465 100644
> --- a/include/corosync/totem/totem.h
> +++ b/include/corosync/totem/totem.h
> @@ -35,6 +35,7 @@
> #ifndef TOTEM_H_DEFINED
> #define TOTEM_H_DEFINED
> #include "totemip.h"
> +#include <corosync/hdb.h>
>
> #ifdef HAVE_SMALL_MEMORY_FOOTPRINT
> #define PROCESSOR_COUNT_MAX 16
> @@ -182,5 +183,74 @@ struct memb_ring_id {
> unsigned long long seq;
> } __attribute__((packed));
>
> +typedef struct {
> + hdb_handle_t handle;
> + int is_dirty;
> + time_t last_updated;
> +} totem_stats_header_t;
> +
> +typedef struct {
> + totem_stats_header_t hdr;
> + uint32_t iface_changes;
> +} totemnet_stats_t;
> +
> +typedef struct {
> + totem_stats_header_t hdr;
> + totemnet_stats_t *net;
> + char *algo_name;
> +} totemrrp_stats_t;
> +
> +
> +typedef struct {
> + uint32_t rx;
> + uint32_t tx;
> + int backlog_calc;
> +} totemsrp_token_stats_t;
> +
> +typedef struct {
> + totem_stats_header_t hdr;
> + totemrrp_stats_t *rrp;
> + uint32_t orf_token_tx;
> + uint32_t orf_token_rx;
> + uint32_t memb_merge_detect_tx;
> + uint32_t memb_merge_detect_rx;
> + uint32_t memb_join_tx;
> + uint32_t memb_join_rx;
> + uint32_t mcast_tx;
> + uint32_t mcast_retx;
> + uint32_t mcast_rx;
> + uint32_t memb_commit_token_tx;
> + uint32_t memb_commit_token_rx;
> + uint32_t token_hold_cancel_tx;
> + uint32_t token_hold_cancel_rx;
> + uint32_t operational_entered;
> + uint32_t operational_token_lost;
> + uint32_t gather_entered;
> + uint32_t gather_token_lost;
> + uint32_t commit_entered;
> + uint32_t commit_token_lost;
> + uint32_t recovery_entered;
> + uint32_t recovery_token_lost;
> + uint32_t consensus_timeouts;
> + uint32_t rx_msg_dropped;
> +
> + void * token_recv_event_handle;
> + void * token_sent_event_handle;
token_recv_event_handle and token_sent_event_handle if you choose not to
remove them should be placed in totemsrp_instance data struct.
> + int earliest_token;
> + int latest_token;
> +#define TOTEM_TOKEN_STATS_MAX 100
> + totemsrp_token_stats_t token[TOTEM_TOKEN_STATS_MAX];
> +
> +} totemsrp_stats_t;
> +
can you make all stats 64 bit?
> +typedef struct {
> + totem_stats_header_t hdr;
> + totemsrp_stats_t *srp;
> +} totemmrp_stats_t;
> +
> +typedef struct {
> + totem_stats_header_t hdr;
> + totemmrp_stats_t *mrp;
> +} totempg_stats_t;
>
> #endif /* TOTEM_H_DEFINED */
> diff --git a/include/corosync/totem/totempg.h b/include/corosync/totem/totempg.h
> index 4609092..954ead4 100644
> --- a/include/corosync/totem/totempg.h
> +++ b/include/corosync/totem/totempg.h
> @@ -143,6 +143,8 @@ extern int totempg_ifaces_get (
> char ***status,
> unsigned int *iface_count);
>
> +extern totempg_stats_t* totempg_get_stats (void);
> +
> extern const char *totempg_ifaces_print (unsigned int nodeid);
>
> extern unsigned int totempg_my_nodeid_get (void);
More information about the Openais
mailing list