[Openais] event service channel fix
Mark Haverkamp
markh at osdl.org
Thu Oct 14 15:11:00 PDT 2004
On Thu, 2004-10-14 at 15:09 -0700, Mark Haverkamp wrote:
> Here is a fix for the event service. It could lose track channel open
> counts when a node left. (Also removed some old debug code).
>
> Mark.
Use this patch instead. Should have only evt.c changes.
===== exec/evt.c 1.8 vs edited =====
--- 1.8/exec/evt.c 2004-10-12 14:59:46 -07:00
+++ edited/exec/evt.c 2004-10-14 15:02:33 -07:00
@@ -32,8 +32,6 @@
*/
//#define DEBUG
-//#define EVT_EVENT_LIST_CHECK
-//#define EVT_ALLOC_CHECK
#include <sys/types.h>
#include <malloc.h>
#include <errno.h>
@@ -246,7 +244,7 @@
struct open_count {
SaClmNodeIdT oc_node_id;
- uint32_t oc_open_count;
+ int32_t oc_open_count;
};
/*
@@ -255,8 +253,7 @@
* esc_channel_name: The name of this channel.
* esc_total_opens: The total number of opens on this channel including
* other nodes.
- * esc_local_opens: The total number of opens on this channel for this
- * node.
+ * esc_local_opens: The number of opens on this channel for this node.
* esc_oc_size: The total number of entries in esc_node_opens;
* esc_node_opens: list of node IDs and how many opens are associated.
* esc_retained_count: How many retained events for this channel
@@ -266,8 +263,8 @@
*/
struct event_svr_channel_instance {
SaNameT esc_channel_name;
- uint32_t esc_total_opens;
- uint32_t esc_local_opens;
+ int32_t esc_total_opens;
+ int32_t esc_local_opens;
uint32_t esc_oc_size;
struct open_count *esc_node_opens;
uint32_t esc_retained_count;
@@ -540,7 +537,7 @@
return -1;
}
memset(&eci->esc_node_opens[eci->esc_oc_size], 0,
- total_members - eci->esc_oc_size);
+ sizeof(struct open_count) * (total_members - eci->esc_oc_size));
eci->esc_oc_size = total_members;
}
return 0;
@@ -548,6 +545,7 @@
/*
* Find the specified node ID in the node list of the channel.
+ * If it's not in the list, add it.
*/
static struct open_count* find_open_count(
struct event_svr_channel_instance *eci,
@@ -558,6 +556,7 @@
for (i = 0; i < eci->esc_oc_size; i++) {
if (eci->esc_node_opens[i].oc_node_id == 0) {
eci->esc_node_opens[i].oc_node_id = node_id;
+ eci->esc_node_opens[i].oc_open_count = 0;
}
if (eci->esc_node_opens[i].oc_node_id == node_id) {
return &eci->esc_node_opens[i];
@@ -569,6 +568,23 @@
return 0;
}
+static void dump_chan_opens(struct event_svr_channel_instance *eci)
+{
+ int i;
+ log_printf(LOG_LEVEL_NOTICE,
+ "(EVT) Channel %s, total %d, local %d\n",
+ eci->esc_channel_name.value,
+ eci->esc_total_opens,
+ eci->esc_local_opens);
+ for (i = 0; i < eci->esc_oc_size; i++) {
+ if (eci->esc_node_opens[i].oc_node_id == 0) {
+ break;
+ }
+ log_printf(LOG_LEVEL_NOTICE, "(EVT) Node 0x%x, count %d\n",
+ eci->esc_node_opens[i].oc_node_id,
+ eci->esc_node_opens[i].oc_open_count);
+ }
+}
/*
* Replace the current open count for a node with the specified value.
@@ -585,8 +601,19 @@
oc = find_open_count(eci, node_id);
if (oc) {
- eci->esc_total_opens -=
- (eci->esc_node_opens[i].oc_open_count + open_count);
+ if (oc->oc_open_count) {
+ if (oc->oc_open_count != open_count) {
+ log_printf(LOG_LEVEL_ERROR,
+ "(EVT) Channel open count error\n");
+ dump_chan_opens(eci);
+ }
+ return 0;
+ }
+ log_printf(LOG_LEVEL_DEBUG,
+ "(EVT) Set count: Chan %s for node 0x%x, was %d, now %d\n",
+ eci->esc_channel_name.value,
+ node_id, eci->esc_node_opens[i].oc_open_count, open_count);
+ eci->esc_total_opens += open_count;
oc->oc_open_count = open_count;
return 0;
}
@@ -607,6 +634,9 @@
return i;
}
+ if (node_id == my_node_id) {
+ eci->esc_local_opens++;
+ }
oc = find_open_count(eci, node_id);
if (oc) {
eci->esc_total_opens++;
@@ -631,10 +661,17 @@
return i;
}
+ if (node_id == my_node_id) {
+ eci->esc_local_opens--;
+ }
oc = find_open_count(eci, node_id);
if (oc) {
eci->esc_total_opens--;
oc->oc_open_count--;
+ if ((eci->esc_total_opens < 0) || (oc->oc_open_count < 0)) {
+ log_printf(LOG_LEVEL_ERROR, "(EVT) Channel open decrement error\n");
+ dump_chan_opens(eci);
+ }
return 0;
}
return -1;
@@ -650,7 +687,7 @@
log_printf(LOG_LEVEL_DEBUG,
"(EVT) Called Delete channel %s t %d, l %d, r %d\n",
eci->esc_channel_name.value,
- eci->esc_total_opens, eci->esc_local_opens,
+ eci->esc_total_opens, eci->esc_local_opens,
eci->esc_retained_count);
if ((eci->esc_retained_count == 0) && (eci->esc_total_opens == 0)) {
log_printf(LOG_LEVEL_DEBUG, "(EVT) Delete channel %s\n",
@@ -662,6 +699,14 @@
eci->esc_channel_name.value);
return;
}
+
+ /*
+ * adjust if we're sending open counts on a config change
+ */
+ if (in_cfg_change && (&eci->esc_entry == next_chan)) {
+ next_chan = eci->esc_entry.next;
+ }
+
list_del(&eci->esc_entry);
if (eci->esc_node_opens) {
free(eci->esc_node_opens);
@@ -685,25 +730,33 @@
*/
for (i = 0; i < eci->esc_oc_size; i++) {
if (eci->esc_node_opens[i].oc_node_id == 0) {
- eci->esc_node_opens[i].oc_node_id = node_id;
+ break;
}
+
+ log_printf(LOG_LEVEL_DEBUG, "(EVT) roc: %x/%x, t %d, oc %d\n",
+ node_id, eci->esc_node_opens[i].oc_node_id,
+ eci->esc_total_opens, eci->esc_node_opens[i].oc_open_count);
+
if (eci->esc_node_opens[i].oc_node_id == node_id) {
+
eci->esc_total_opens -= eci->esc_node_opens[i].oc_open_count;
+
for (j = i+1; j < eci->esc_oc_size; j++, i++) {
eci->esc_node_opens[i].oc_node_id =
eci->esc_node_opens[j].oc_node_id;
eci->esc_node_opens[i].oc_open_count =
eci->esc_node_opens[j].oc_open_count;
}
+
eci->esc_node_opens[eci->esc_oc_size-1].oc_node_id = 0;
eci->esc_node_opens[eci->esc_oc_size-1].oc_open_count = 0;
- }
- /*
- * Remove the channel if it's not being used anymore
- */
- delete_channel(eci);
- return 0;
+ /*
+ * Remove the channel if it's not being used anymore
+ */
+ delete_channel(eci);
+ return 0;
+ }
}
return -1;
}
@@ -1174,10 +1227,6 @@
}
-#ifdef EVT_ALLOC_CHECK
-static uint32_t evt_alloc = 0;
-static uint32_t evt_free = 0;
-#endif
/*
* Free up an event structure if it isn't being used anymore.
@@ -1194,13 +1243,6 @@
free(edp->ed_delivered);
}
-#ifdef EVT_ALLOC_CHECK
- evt_free++;
- if ((evt_free % 1000) == 0) {
- log_printf(LOG_LEVEL_NOTICE, "(EVT) evt alloc: %u, *evt free: %u\n",
- evt_alloc, evt_free);
- }
-#endif
free(edp);
}
@@ -1229,7 +1271,9 @@
* Check to see it the channel isn't in use anymore.
*/
edp->ed_my_chan->esc_retained_count--;
- delete_channel(edp->ed_my_chan);
+ if (edp->ed_my_chan->esc_retained_count == 0) {
+ delete_channel(edp->ed_my_chan);
+ }
free_event_data(edp);
}
@@ -1271,7 +1315,9 @@
* Check to see if the channel isn't in use anymore.
*/
edp->ed_my_chan->esc_retained_count--;
- delete_channel(edp->ed_my_chan);
+ if (edp->ed_my_chan->esc_retained_count == 0) {
+ delete_channel(edp->ed_my_chan);
+ }
free_event_data(edp);
break;
}
@@ -1515,13 +1561,6 @@
list_init(&cel->cel_entry);
esip->esi_nevents--;
-#ifdef EVT_EVENT_LIST_CHECK
- if (esip->esi_nevents < 0) {
- log_printf(LOG_LEVEL_NOTICE,
- "(EVT) event count went negative\n");
- esip->esi_nevents = 0;
- }
-#endif
free_event_data(cel->cel_event);
free(cel);
next_event:
@@ -1720,13 +1759,6 @@
eps++;
}
-#ifdef EVT_ALLOC_CHECK
- evt_alloc++;
- if ((evt_alloc % 1000) == 0) {
- log_printf(LOG_LEVEL_NOTICE, "(EVT) *evt alloc: %u, evt free: %u\n",
- evt_alloc, evt_free);
- }
-#endif
ed->ed_ref_count++;
return ed;
}
@@ -2334,15 +2366,6 @@
esip->esi_queue_blocked = 0;
log_printf(LOG_LEVEL_DEBUG, "(EVT) unblock\n");
}
-#ifdef EVT_EVENT_LIST_CHECK
- if (esip->esi_nevents < 0) {
- log_printf(LOG_LEVEL_NOTICE, "(EVT) event count went negative\n");
- if (!list_empty(&esip->esi_events[i])) {
- log_printf(LOG_LEVEL_NOTICE, "(EVT) event list isn't empty\n");
- }
- esip->esi_nevents = 0;
- }
-#endif
edp = cel->cel_event;
edp->ed_event.led_lib_channel_handle = cel->cel_chan_handle;
edp->ed_event.led_sub_id = cel->cel_sub_id;
@@ -2377,11 +2400,11 @@
*/
static void remove_chan_open_info(SaClmNodeIdT node_id)
{
- struct list_head *l;
+ struct list_head *l, *nxt;
struct event_svr_channel_instance *eci;
- for (l = esc_head.next; l != &esc_head; l = l->next) {
-
+ for (l = esc_head.next; l != &esc_head; l = nxt) {
+ nxt = l->next;
eci = list_entry(l, struct event_svr_channel_instance, esc_entry);
remove_open_count(eci, node_id);
@@ -2476,7 +2499,7 @@
while (left_list_entries--) {
md = evt_find_node(left_list->sin_addr);
if (md == 0) {
- log_printf(LOG_LEVEL_DEBUG,
+ log_printf(LOG_LEVEL_WARNING,
"(EVT) Can't find cluster node at %s\n",
inet_ntoa(left_list->sin_addr));
/*
@@ -2508,7 +2531,7 @@
*/
if (configuration_type == GMI_CONFIGURATION_REGULAR) {
if (in_cfg_change) {
- log_printf(LOG_LEVEL_DEBUG,
+ log_printf(LOG_LEVEL_NOTICE,
"(EVT) Already in config change, Starting over, m %d, c %d\n",
total_members, checked_in);
}
@@ -2553,23 +2576,6 @@
saHandleDestroy(&esip->esi_hdb, eco->eco_my_handle);
}
-#ifdef EVT_EVENT_LIST_CHECK
-{
- int i;
- if (esip->esi_nevents) {
- log_printf(LOG_LEVEL_WARNING,
- "(EVT) %d Events left on delivery list after finalize\n",
- esip->esi_nevents);
- }
-
- for (i = SA_EVT_HIGHEST_PRIORITY; i <= SA_EVT_LOWEST_PRIORITY; i++) {
- if (!list_empty(&esip->esi_events[i])) {
- log_printf(LOG_LEVEL_WARNING,
- "(EVT) Events list not empty after finalize\n");
- }
- }
-}
-#endif
/*
* Delete track entry if there is one
@@ -2675,11 +2681,12 @@
*/
if (!eci) {
if (evtpkt->led_retention_time) {
- eci = create_channel(&evtpkt->led_chan_name);
+ // eci = create_channel(&evtpkt->led_chan_name);
if (!eci) {
- log_printf(LOG_LEVEL_WARNING, "(EVT) Can't create channel %s\n",
+ log_printf(LOG_LEVEL_WARNING,
+ "(EVT) Channel %s doesn't exist\n",
evtpkt->led_chan_name.value);
-
+ return 0;
}
} else {
return 0;
@@ -2828,9 +2835,10 @@
* since we're in recovery mode, so that we can save this message.
*/
if (!eci) {
- eci = create_channel(&evtpkt->led_chan_name);
+ // eci = create_channel(&evtpkt->led_chan_name);
if (!eci) {
- log_printf(LOG_LEVEL_WARNING, "(EVT) Can't create channel %s\n",
+ log_printf(LOG_LEVEL_WARNING,
+ "(EVT) Channel %s doesn't exist\n",
evtpkt->led_chan_name.value);
return 0;
@@ -2999,7 +3007,6 @@
inc_open_count(eci, mn->mn_node_info.nodeId);
if (mn->mn_node_info.nodeId == my_node->nodeId) {
- eci->esc_local_opens++;
/*
* Complete one of our pending open requests
*/
@@ -3015,7 +3022,7 @@
log_printf(LOG_LEVEL_DEBUG,
"(EVT) Open channel %s t %d, l %d, r %d\n",
eci->esc_channel_name.value,
- eci->esc_total_opens, eci->esc_local_opens,
+ eci->esc_total_opens, eci->esc_local_opens,
eci->esc_retained_count);
break;
}
@@ -3031,18 +3038,19 @@
if (!eci) {
log_printf(LOG_LEVEL_NOTICE,
"(EVT) Channel close request for %s not found\n",
- cpkt->u.chc_chan);
+ cpkt->u.chc_chan.value);
break;
}
- if (mn->mn_node_info.nodeId == my_node->nodeId) {
- eci->esc_local_opens--;
- }
-
/*
* if last instance, we can free up assocated data.
*/
dec_open_count(eci, mn->mn_node_info.nodeId);
+ log_printf(LOG_LEVEL_DEBUG,
+ "(EVT) Close channel %s t %d, l %d, r %d\n",
+ eci->esc_channel_name.value,
+ eci->esc_total_opens, eci->esc_local_opens,
+ eci->esc_retained_count);
delete_channel(eci);
break;
--
Mark Haverkamp <markh at osdl.org>
More information about the Openais
mailing list