[Openais] event service channel fix
Steven Dake
sdake at mvista.com
Thu Oct 14 15:21:09 PDT 2004
Mark
I think the patch looks pretty good
I have a question further down in the patch if you could take a look.
Something looks a little suspect.
On Thu, 2004-10-14 at 15:11, Mark Haverkamp wrote:
> On Thu, 2004-10-14 at 15:09 -0700, Mark Haverkamp wrote:
> > Here is a fix for the event service. It could lose track channel open
> > counts when a node left. (Also removed some old debug code).
> >
> > Mark.
>
> Use this patch instead. Should have only evt.c changes.
>
>
> ===== exec/evt.c 1.8 vs edited =====
> --- 1.8/exec/evt.c 2004-10-12 14:59:46 -07:00
> +++ edited/exec/evt.c 2004-10-14 15:02:33 -07:00
> @@ -32,8 +32,6 @@
> */
>
> //#define DEBUG
> -//#define EVT_EVENT_LIST_CHECK
> -//#define EVT_ALLOC_CHECK
> #include <sys/types.h>
> #include <malloc.h>
> #include <errno.h>
> @@ -246,7 +244,7 @@
>
> struct open_count {
> SaClmNodeIdT oc_node_id;
> - uint32_t oc_open_count;
> + int32_t oc_open_count;
> };
>
> /*
> @@ -255,8 +253,7 @@
> * esc_channel_name: The name of this channel.
> * esc_total_opens: The total number of opens on this channel including
> * other nodes.
> - * esc_local_opens: The total number of opens on this channel for this
> - * node.
> + * esc_local_opens: The number of opens on this channel for this node.
> * esc_oc_size: The total number of entries in esc_node_opens;
> * esc_node_opens: list of node IDs and how many opens are associated.
> * esc_retained_count: How many retained events for this channel
> @@ -266,8 +263,8 @@
> */
> struct event_svr_channel_instance {
> SaNameT esc_channel_name;
> - uint32_t esc_total_opens;
> - uint32_t esc_local_opens;
> + int32_t esc_total_opens;
> + int32_t esc_local_opens;
> uint32_t esc_oc_size;
> struct open_count *esc_node_opens;
> uint32_t esc_retained_count;
> @@ -540,7 +537,7 @@
> return -1;
> }
> memset(&eci->esc_node_opens[eci->esc_oc_size], 0,
> - total_members - eci->esc_oc_size);
> + sizeof(struct open_count) * (total_members - eci->esc_oc_size));
> eci->esc_oc_size = total_members;
> }
> return 0;
> @@ -548,6 +545,7 @@
>
> /*
> * Find the specified node ID in the node list of the channel.
> + * If it's not in the list, add it.
> */
> static struct open_count* find_open_count(
> struct event_svr_channel_instance *eci,
> @@ -558,6 +556,7 @@
> for (i = 0; i < eci->esc_oc_size; i++) {
> if (eci->esc_node_opens[i].oc_node_id == 0) {
> eci->esc_node_opens[i].oc_node_id = node_id;
> + eci->esc_node_opens[i].oc_open_count = 0;
> }
> if (eci->esc_node_opens[i].oc_node_id == node_id) {
> return &eci->esc_node_opens[i];
> @@ -569,6 +568,23 @@
> return 0;
> }
>
> +static void dump_chan_opens(struct event_svr_channel_instance *eci)
> +{
> + int i;
> + log_printf(LOG_LEVEL_NOTICE,
> + "(EVT) Channel %s, total %d, local %d\n",
> + eci->esc_channel_name.value,
> + eci->esc_total_opens,
> + eci->esc_local_opens);
> + for (i = 0; i < eci->esc_oc_size; i++) {
> + if (eci->esc_node_opens[i].oc_node_id == 0) {
> + break;
> + }
> + log_printf(LOG_LEVEL_NOTICE, "(EVT) Node 0x%x, count %d\n",
> + eci->esc_node_opens[i].oc_node_id,
> + eci->esc_node_opens[i].oc_open_count);
> + }
> +}
>
> /*
> * Replace the current open count for a node with the specified value.
> @@ -585,8 +601,19 @@
>
> oc = find_open_count(eci, node_id);
> if (oc) {
> - eci->esc_total_opens -=
> - (eci->esc_node_opens[i].oc_open_count + open_count);
> + if (oc->oc_open_count) {
> + if (oc->oc_open_count != open_count) {
> + log_printf(LOG_LEVEL_ERROR,
> + "(EVT) Channel open count error\n");
> + dump_chan_opens(eci);
> + }
> + return 0;
> + }
> + log_printf(LOG_LEVEL_DEBUG,
> + "(EVT) Set count: Chan %s for node 0x%x, was %d, now %d\n",
> + eci->esc_channel_name.value,
> + node_id, eci->esc_node_opens[i].oc_open_count, open_count);
> + eci->esc_total_opens += open_count;
> oc->oc_open_count = open_count;
> return 0;
> }
> @@ -607,6 +634,9 @@
> return i;
> }
>
> + if (node_id == my_node_id) {
> + eci->esc_local_opens++;
> + }
> oc = find_open_count(eci, node_id);
> if (oc) {
> eci->esc_total_opens++;
> @@ -631,10 +661,17 @@
> return i;
> }
>
> + if (node_id == my_node_id) {
> + eci->esc_local_opens--;
> + }
> oc = find_open_count(eci, node_id);
> if (oc) {
> eci->esc_total_opens--;
> oc->oc_open_count--;
> + if ((eci->esc_total_opens < 0) || (oc->oc_open_count < 0)) {
> + log_printf(LOG_LEVEL_ERROR, "(EVT) Channel open decrement error\n");
> + dump_chan_opens(eci);
> + }
> return 0;
> }
> return -1;
> @@ -650,7 +687,7 @@
> log_printf(LOG_LEVEL_DEBUG,
> "(EVT) Called Delete channel %s t %d, l %d, r %d\n",
> eci->esc_channel_name.value,
> - eci->esc_total_opens, eci->esc_local_opens,
> + eci->esc_total_opens, eci->esc_local_opens,
> eci->esc_retained_count);
> if ((eci->esc_retained_count == 0) && (eci->esc_total_opens == 0)) {
> log_printf(LOG_LEVEL_DEBUG, "(EVT) Delete channel %s\n",
> @@ -662,6 +699,14 @@
> eci->esc_channel_name.value);
> return;
> }
> +
> + /*
> + * adjust if we're sending open counts on a config change
> + */
> + if (in_cfg_change && (&eci->esc_entry == next_chan)) {
> + next_chan = eci->esc_entry.next;
> + }
> +
> list_del(&eci->esc_entry);
> if (eci->esc_node_opens) {
> free(eci->esc_node_opens);
> @@ -685,25 +730,33 @@
> */
> for (i = 0; i < eci->esc_oc_size; i++) {
> if (eci->esc_node_opens[i].oc_node_id == 0) {
> - eci->esc_node_opens[i].oc_node_id = node_id;
> + break;
> }
> +
> + log_printf(LOG_LEVEL_DEBUG, "(EVT) roc: %x/%x, t %d, oc %d\n",
> + node_id, eci->esc_node_opens[i].oc_node_id,
> + eci->esc_total_opens, eci->esc_node_opens[i].oc_open_count);
> +
> if (eci->esc_node_opens[i].oc_node_id == node_id) {
> +
> eci->esc_total_opens -= eci->esc_node_opens[i].oc_open_count;
> +
> for (j = i+1; j < eci->esc_oc_size; j++, i++) {
> eci->esc_node_opens[i].oc_node_id =
> eci->esc_node_opens[j].oc_node_id;
> eci->esc_node_opens[i].oc_open_count =
> eci->esc_node_opens[j].oc_open_count;
> }
> +
> eci->esc_node_opens[eci->esc_oc_size-1].oc_node_id = 0;
> eci->esc_node_opens[eci->esc_oc_size-1].oc_open_count = 0;
> - }
>
> - /*
> - * Remove the channel if it's not being used anymore
> - */
> - delete_channel(eci);
> - return 0;
> + /*
> + * Remove the channel if it's not being used anymore
> + */
> + delete_channel(eci);
> + return 0;
> + }
> }
> return -1;
> }
> @@ -1174,10 +1227,6 @@
> }
>
>
> -#ifdef EVT_ALLOC_CHECK
> -static uint32_t evt_alloc = 0;
> -static uint32_t evt_free = 0;
> -#endif
>
> /*
> * Free up an event structure if it isn't being used anymore.
> @@ -1194,13 +1243,6 @@
> free(edp->ed_delivered);
> }
>
> -#ifdef EVT_ALLOC_CHECK
> - evt_free++;
> - if ((evt_free % 1000) == 0) {
> - log_printf(LOG_LEVEL_NOTICE, "(EVT) evt alloc: %u, *evt free: %u\n",
> - evt_alloc, evt_free);
> - }
> -#endif
> free(edp);
> }
>
> @@ -1229,7 +1271,9 @@
> * Check to see it the channel isn't in use anymore.
> */
> edp->ed_my_chan->esc_retained_count--;
> - delete_channel(edp->ed_my_chan);
> + if (edp->ed_my_chan->esc_retained_count == 0) {
> + delete_channel(edp->ed_my_chan);
> + }
> free_event_data(edp);
> }
>
> @@ -1271,7 +1315,9 @@
> * Check to see if the channel isn't in use anymore.
> */
> edp->ed_my_chan->esc_retained_count--;
> - delete_channel(edp->ed_my_chan);
> + if (edp->ed_my_chan->esc_retained_count == 0) {
> + delete_channel(edp->ed_my_chan);
> + }
> free_event_data(edp);
> break;
> }
> @@ -1515,13 +1561,6 @@
> list_init(&cel->cel_entry);
> esip->esi_nevents--;
>
> -#ifdef EVT_EVENT_LIST_CHECK
> - if (esip->esi_nevents < 0) {
> - log_printf(LOG_LEVEL_NOTICE,
> - "(EVT) event count went negative\n");
> - esip->esi_nevents = 0;
> - }
> -#endif
> free_event_data(cel->cel_event);
> free(cel);
> next_event:
> @@ -1720,13 +1759,6 @@
> eps++;
> }
>
> -#ifdef EVT_ALLOC_CHECK
> - evt_alloc++;
> - if ((evt_alloc % 1000) == 0) {
> - log_printf(LOG_LEVEL_NOTICE, "(EVT) *evt alloc: %u, evt free: %u\n",
> - evt_alloc, evt_free);
> - }
> -#endif
> ed->ed_ref_count++;
> return ed;
> }
> @@ -2334,15 +2366,6 @@
> esip->esi_queue_blocked = 0;
> log_printf(LOG_LEVEL_DEBUG, "(EVT) unblock\n");
> }
> -#ifdef EVT_EVENT_LIST_CHECK
> - if (esip->esi_nevents < 0) {
> - log_printf(LOG_LEVEL_NOTICE, "(EVT) event count went negative\n");
> - if (!list_empty(&esip->esi_events[i])) {
> - log_printf(LOG_LEVEL_NOTICE, "(EVT) event list isn't empty\n");
> - }
> - esip->esi_nevents = 0;
> - }
> -#endif
> edp = cel->cel_event;
> edp->ed_event.led_lib_channel_handle = cel->cel_chan_handle;
> edp->ed_event.led_sub_id = cel->cel_sub_id;
> @@ -2377,11 +2400,11 @@
> */
> static void remove_chan_open_info(SaClmNodeIdT node_id)
> {
> - struct list_head *l;
> + struct list_head *l, *nxt;
> struct event_svr_channel_instance *eci;
>
> - for (l = esc_head.next; l != &esc_head; l = l->next) {
> -
> + for (l = esc_head.next; l != &esc_head; l = nxt) {
> + nxt = l->next;
> eci = list_entry(l, struct event_svr_channel_instance, esc_entry);
> remove_open_count(eci, node_id);
>
> @@ -2476,7 +2499,7 @@
> while (left_list_entries--) {
> md = evt_find_node(left_list->sin_addr);
> if (md == 0) {
> - log_printf(LOG_LEVEL_DEBUG,
> + log_printf(LOG_LEVEL_WARNING,
> "(EVT) Can't find cluster node at %s\n",
> inet_ntoa(left_list->sin_addr));
> /*
> @@ -2508,7 +2531,7 @@
> */
> if (configuration_type == GMI_CONFIGURATION_REGULAR) {
> if (in_cfg_change) {
> - log_printf(LOG_LEVEL_DEBUG,
> + log_printf(LOG_LEVEL_NOTICE,
> "(EVT) Already in config change, Starting over, m %d, c %d\n",
> total_members, checked_in);
> }
> @@ -2553,23 +2576,6 @@
> saHandleDestroy(&esip->esi_hdb, eco->eco_my_handle);
> }
>
> -#ifdef EVT_EVENT_LIST_CHECK
> -{
> - int i;
> - if (esip->esi_nevents) {
> - log_printf(LOG_LEVEL_WARNING,
> - "(EVT) %d Events left on delivery list after finalize\n",
> - esip->esi_nevents);
> - }
> -
> - for (i = SA_EVT_HIGHEST_PRIORITY; i <= SA_EVT_LOWEST_PRIORITY; i++) {
> - if (!list_empty(&esip->esi_events[i])) {
> - log_printf(LOG_LEVEL_WARNING,
> - "(EVT) Events list not empty after finalize\n");
> - }
> - }
> -}
> -#endif
>
> /*
> * Delete track entry if there is one
> @@ -2675,11 +2681,12 @@
> */
> if (!eci) {
> if (evtpkt->led_retention_time) {
> - eci = create_channel(&evtpkt->led_chan_name);
> + // eci = create_channel(&evtpkt->led_chan_name);
Is eci already defined above here? If so, is the commented out code
needed?
> if (!eci) {
> - log_printf(LOG_LEVEL_WARNING, "(EVT) Can't create channel %s\n",
> + log_printf(LOG_LEVEL_WARNING,
> + "(EVT) Channel %s doesn't exist\n",
> evtpkt->led_chan_name.value);
> -
> + return 0;
> }
> } else {
> return 0;
> @@ -2828,9 +2835,10 @@
> * since we're in recovery mode, so that we can save this message.
> */
> if (!eci) {
> - eci = create_channel(&evtpkt->led_chan_name);
> + // eci = create_channel(&evtpkt->led_chan_name);
(same comment)
Is eci already defined above here? If so, is the commented out code
needed?
> if (!eci) {
> - log_printf(LOG_LEVEL_WARNING, "(EVT) Can't create channel %s\n",
> + log_printf(LOG_LEVEL_WARNING,
> + "(EVT) Channel %s doesn't exist\n",
> evtpkt->led_chan_name.value);
>
> return 0;
> @@ -2999,7 +3007,6 @@
> inc_open_count(eci, mn->mn_node_info.nodeId);
>
> if (mn->mn_node_info.nodeId == my_node->nodeId) {
> - eci->esc_local_opens++;
> /*
> * Complete one of our pending open requests
> */
> @@ -3015,7 +3022,7 @@
> log_printf(LOG_LEVEL_DEBUG,
> "(EVT) Open channel %s t %d, l %d, r %d\n",
> eci->esc_channel_name.value,
> - eci->esc_total_opens, eci->esc_local_opens,
> + eci->esc_total_opens, eci->esc_local_opens,
> eci->esc_retained_count);
> break;
> }
> @@ -3031,18 +3038,19 @@
> if (!eci) {
> log_printf(LOG_LEVEL_NOTICE,
> "(EVT) Channel close request for %s not found\n",
> - cpkt->u.chc_chan);
> + cpkt->u.chc_chan.value);
> break;
> }
>
> - if (mn->mn_node_info.nodeId == my_node->nodeId) {
> - eci->esc_local_opens--;
> - }
> -
> /*
> * if last instance, we can free up assocated data.
> */
> dec_open_count(eci, mn->mn_node_info.nodeId);
> + log_printf(LOG_LEVEL_DEBUG,
> + "(EVT) Close channel %s t %d, l %d, r %d\n",
> + eci->esc_channel_name.value,
> + eci->esc_total_opens, eci->esc_local_opens,
> + eci->esc_retained_count);
> delete_channel(eci);
> break;
>
More information about the Openais
mailing list