[Openais] event service channel fix

Steven Dake sdake at mvista.com
Thu Oct 14 15:21:09 PDT 2004


Mark

I think the patch looks pretty good

I have a question further down in the patch if you could take a look. 
Something looks a little suspect.

On Thu, 2004-10-14 at 15:11, Mark Haverkamp wrote:
> On Thu, 2004-10-14 at 15:09 -0700, Mark Haverkamp wrote:
> > Here is a fix for the event service.  It could lose track channel open
> > counts when a node left.  (Also removed some old debug code).
> > 
> > Mark.
> 
> Use this patch instead.  Should have only evt.c changes.
> 
> 
> ===== exec/evt.c 1.8 vs edited =====
> --- 1.8/exec/evt.c	2004-10-12 14:59:46 -07:00
> +++ edited/exec/evt.c	2004-10-14 15:02:33 -07:00
> @@ -32,8 +32,6 @@
>   */
>  
>  //#define DEBUG
> -//#define EVT_EVENT_LIST_CHECK
> -//#define EVT_ALLOC_CHECK
>  #include <sys/types.h>
>  #include <malloc.h>
>  #include <errno.h>
> @@ -246,7 +244,7 @@
>  
>  struct open_count {
>  	SaClmNodeIdT	oc_node_id;
> -	uint32_t		oc_open_count;
> +	int32_t			oc_open_count;
>  };
>  
>  /*
> @@ -255,8 +253,7 @@
>   * esc_channel_name:	The name of this channel.
>   * esc_total_opens:		The total number of opens on this channel including
>   * 						other nodes.
> - * esc_local_opens:		The total number of opens on this channel for this
> - * 						node.
> + * esc_local_opens:		The number of opens on this channel for this node.
>   * esc_oc_size:			The total number of entries in esc_node_opens;
>   * esc_node_opens:		list of node IDs and how many opens are associated.
>   * esc_retained_count:	How many retained events for this channel
> @@ -266,8 +263,8 @@
>   */
>  struct event_svr_channel_instance {
>  	SaNameT				esc_channel_name;
> -	uint32_t			esc_total_opens;
> -	uint32_t			esc_local_opens;
> +	int32_t				esc_total_opens;
> +	int32_t				esc_local_opens;
>  	uint32_t			esc_oc_size;
>  	struct open_count	*esc_node_opens;
>  	uint32_t			esc_retained_count;
> @@ -540,7 +537,7 @@
>  			return -1;
>  		}
>  		memset(&eci->esc_node_opens[eci->esc_oc_size], 0, 
> -				total_members - eci->esc_oc_size);
> +			sizeof(struct open_count) * (total_members - eci->esc_oc_size));
>  		eci->esc_oc_size = total_members;
>  	}
>  	return 0;
> @@ -548,6 +545,7 @@
>  
>  /*
>   * Find the specified node ID in the node list of the channel.
> + * If it's not in the list, add it.
>   */
>  static struct open_count* find_open_count(
>  		struct event_svr_channel_instance *eci,
> @@ -558,6 +556,7 @@
>  	for (i = 0; i < eci->esc_oc_size; i++) {
>  		if (eci->esc_node_opens[i].oc_node_id == 0) {
>  			eci->esc_node_opens[i].oc_node_id = node_id;
> +			eci->esc_node_opens[i].oc_open_count = 0;
>  		}
>  		if (eci->esc_node_opens[i].oc_node_id == node_id) {
>  			return &eci->esc_node_opens[i];
> @@ -569,6 +568,23 @@
>  	return 0;
>  }
>  
> +static void dump_chan_opens(struct event_svr_channel_instance *eci)
> +{
> +	int i;
> +	log_printf(LOG_LEVEL_NOTICE,
> +			"(EVT) Channel %s, total %d, local %d\n",
> +			eci->esc_channel_name.value,
> +			eci->esc_total_opens,
> +			eci->esc_local_opens);
> +	for (i = 0; i < eci->esc_oc_size; i++) {
> +		if (eci->esc_node_opens[i].oc_node_id == 0) {
> +			break;
> +		}
> +		log_printf(LOG_LEVEL_NOTICE, "(EVT) Node 0x%x, count %d\n",
> +			eci->esc_node_opens[i].oc_node_id, 
> +			eci->esc_node_opens[i].oc_open_count);
> +		}
> +}
>  
>  /*
>   * Replace the current open count for a node with the specified value.
> @@ -585,8 +601,19 @@
>  
>  	oc = find_open_count(eci, node_id);
>  	if (oc) {
> -		eci->esc_total_opens -= 
> -				(eci->esc_node_opens[i].oc_open_count + open_count);
> +		if (oc->oc_open_count) {
> +			if (oc->oc_open_count != open_count) {
> +				log_printf(LOG_LEVEL_ERROR, 
> +						"(EVT) Channel open count error\n");
> +				dump_chan_opens(eci);
> +			}
> +			return 0;
> +		} 
> +		log_printf(LOG_LEVEL_DEBUG, 
> +			"(EVT) Set count: Chan %s for node 0x%x, was %d, now %d\n",
> +			eci->esc_channel_name.value,
> +			node_id, eci->esc_node_opens[i].oc_open_count, open_count);
> +		eci->esc_total_opens += open_count;
>  		oc->oc_open_count = open_count;
>  		return 0;
>  	}
> @@ -607,6 +634,9 @@
>  		return i;
>  	}
>  
> +	if (node_id == my_node_id) {
> +		eci->esc_local_opens++;
> +	}
>  	oc = find_open_count(eci, node_id);
>  	if (oc) {
>  		eci->esc_total_opens++;
> @@ -631,10 +661,17 @@
>  		return i;
>  	}
>  
> +	if (node_id == my_node_id) {
> +		eci->esc_local_opens--;
> +	}
>  	oc = find_open_count(eci, node_id);
>  	if (oc) {
>  		eci->esc_total_opens--;
>  		oc->oc_open_count--;
> +		if ((eci->esc_total_opens < 0) || (oc->oc_open_count < 0)) {
> +			log_printf(LOG_LEVEL_ERROR, "(EVT) Channel open decrement error\n");
> +			dump_chan_opens(eci);
> +		}
>  		return 0;
>  	}
>  	return -1;
> @@ -650,7 +687,7 @@
>  	log_printf(LOG_LEVEL_DEBUG, 
>  			"(EVT) Called Delete channel %s t %d, l %d, r %d\n",
>  			eci->esc_channel_name.value,
> -			eci->esc_total_opens, eci->esc_local_opens, 
> +			eci->esc_total_opens, eci->esc_local_opens,
>  			eci->esc_retained_count);
>  	if ((eci->esc_retained_count == 0)  && (eci->esc_total_opens == 0)) {
>  		log_printf(LOG_LEVEL_DEBUG, "(EVT) Delete channel %s\n",
> @@ -662,6 +699,14 @@
>  					eci->esc_channel_name.value);
>  				return;
>  		}
> +		
> +		/*
> +		 * adjust if we're sending open counts on a config change
> +		 */
> +		if (in_cfg_change && (&eci->esc_entry == next_chan)) {
> +			next_chan = eci->esc_entry.next;
> +		}
> +
>  		list_del(&eci->esc_entry);
>  		if (eci->esc_node_opens) {
>  			free(eci->esc_node_opens);
> @@ -685,25 +730,33 @@
>  	 */
>  	for (i = 0; i < eci->esc_oc_size; i++) {
>  		if (eci->esc_node_opens[i].oc_node_id == 0) {
> -			eci->esc_node_opens[i].oc_node_id = node_id;
> +			break;
>  		}
> +
> +		log_printf(LOG_LEVEL_DEBUG, "(EVT) roc: %x/%x, t %d, oc %d\n",
> +			node_id, eci->esc_node_opens[i].oc_node_id,
> +			eci->esc_total_opens, eci->esc_node_opens[i].oc_open_count);
> +		
>  		if (eci->esc_node_opens[i].oc_node_id == node_id) {
> +
>  			eci->esc_total_opens -= eci->esc_node_opens[i].oc_open_count;
> +
>  			for (j = i+1; j < eci->esc_oc_size; j++, i++) {
>  				eci->esc_node_opens[i].oc_node_id = 
>  				 eci->esc_node_opens[j].oc_node_id;
>  				eci->esc_node_opens[i].oc_open_count = 
>  				 eci->esc_node_opens[j].oc_open_count;
>  			}
> +
>  			eci->esc_node_opens[eci->esc_oc_size-1].oc_node_id = 0;
>  			eci->esc_node_opens[eci->esc_oc_size-1].oc_open_count = 0;
> -		}
>  
> -		/*
> -		 * Remove the channel if it's not being used anymore
> -		 */
> -		delete_channel(eci);
> -		return 0;
> +			/*
> +			 * Remove the channel if it's not being used anymore
> +			 */
> +			delete_channel(eci);
> +			return 0;
> +		}
>  	}
>  	return -1;
>  }
> @@ -1174,10 +1227,6 @@
>  }
>  
> 
> -#ifdef EVT_ALLOC_CHECK
> -static uint32_t evt_alloc = 0;
> -static uint32_t evt_free = 0;
> -#endif
>  
>  /*
>   * Free up an event structure if it isn't being used anymore.
> @@ -1194,13 +1243,6 @@
>  		free(edp->ed_delivered);
>  	}
>  
> -#ifdef EVT_ALLOC_CHECK
> -	evt_free++;
> -	if ((evt_free % 1000) == 0) {
> -			log_printf(LOG_LEVEL_NOTICE, "(EVT) evt alloc: %u, *evt free: %u\n",
> -							evt_alloc, evt_free);
> -	}
> -#endif
>  	free(edp);
>  }
>  
> @@ -1229,7 +1271,9 @@
>  	 * Check to see it the channel isn't in use anymore.
>  	 */
>  	edp->ed_my_chan->esc_retained_count--;
> -	delete_channel(edp->ed_my_chan);
> +	if (edp->ed_my_chan->esc_retained_count == 0) {
> +		delete_channel(edp->ed_my_chan);
> +	}
>  	free_event_data(edp);
>  }
>  
> @@ -1271,7 +1315,9 @@
>  		 * Check to see if the channel isn't in use anymore.
>  		 */
>  		edp->ed_my_chan->esc_retained_count--;
> -		delete_channel(edp->ed_my_chan);
> +		if (edp->ed_my_chan->esc_retained_count == 0) {
> +			delete_channel(edp->ed_my_chan);
> +		}
>  		free_event_data(edp);
>  		break;
>  	}
> @@ -1515,13 +1561,6 @@
>  			 list_init(&cel->cel_entry);
>  			 esip->esi_nevents--;
>  		
> -#ifdef EVT_EVENT_LIST_CHECK
> -			 if (esip->esi_nevents < 0) {
> -				log_printf(LOG_LEVEL_NOTICE, 
> -						"(EVT) event count went negative\n");
> -				esip->esi_nevents = 0;
> -			 }
> -#endif
>  			 free_event_data(cel->cel_event);
>  			 free(cel);
>  next_event:
> @@ -1720,13 +1759,6 @@
>  		eps++;
>  	}
>  
> -#ifdef EVT_ALLOC_CHECK
> -	evt_alloc++;
> -	if ((evt_alloc % 1000) == 0) {
> -			log_printf(LOG_LEVEL_NOTICE, "(EVT) *evt alloc: %u, evt free: %u\n",
> -							evt_alloc, evt_free);
> -	}
> -#endif
>  	ed->ed_ref_count++;
>  	return ed;
>  }
> @@ -2334,15 +2366,6 @@
>  				esip->esi_queue_blocked = 0;
>  				log_printf(LOG_LEVEL_DEBUG, "(EVT) unblock\n");
>  			}
> -#ifdef EVT_EVENT_LIST_CHECK
> -			if (esip->esi_nevents < 0) {
> -				log_printf(LOG_LEVEL_NOTICE, "(EVT) event count went negative\n");
> -				if (!list_empty(&esip->esi_events[i])) {
> -					log_printf(LOG_LEVEL_NOTICE, "(EVT) event list isn't empty\n");
> -				}
> -				esip->esi_nevents = 0;
> -			}
> -#endif
>  			edp = cel->cel_event;
>  			edp->ed_event.led_lib_channel_handle = cel->cel_chan_handle;
>  			edp->ed_event.led_sub_id = cel->cel_sub_id;
> @@ -2377,11 +2400,11 @@
>   */
>  static void remove_chan_open_info(SaClmNodeIdT node_id)
>  {
> -	struct list_head *l;
> +	struct list_head *l, *nxt;
>  	struct event_svr_channel_instance *eci;
>  
> -	for (l = esc_head.next; l != &esc_head; l = l->next) {
> -
> +	for (l = esc_head.next; l != &esc_head; l = nxt) {
> +		nxt = l->next;
>  		eci = list_entry(l, struct event_svr_channel_instance, esc_entry);
>  		remove_open_count(eci, node_id);
>  
> @@ -2476,7 +2499,7 @@
>  	while (left_list_entries--) {
>  		md = evt_find_node(left_list->sin_addr);
>  		if (md == 0) {
> -			log_printf(LOG_LEVEL_DEBUG, 
> +			log_printf(LOG_LEVEL_WARNING, 
>  					"(EVT) Can't find cluster node at %s\n",
>  							inet_ntoa(left_list->sin_addr));
>  		/*
> @@ -2508,7 +2531,7 @@
>  	 */
>  	if (configuration_type == GMI_CONFIGURATION_REGULAR) {
>  		if (in_cfg_change) {
> -			log_printf(LOG_LEVEL_DEBUG, 
> +			log_printf(LOG_LEVEL_NOTICE, 
>  				"(EVT) Already in config change, Starting over, m %d, c %d\n",
>  					total_members, checked_in);
>  		}
> @@ -2553,23 +2576,6 @@
>  		saHandleDestroy(&esip->esi_hdb, eco->eco_my_handle);
>  	}
>  
> -#ifdef EVT_EVENT_LIST_CHECK
> -{
> -	int i;
> -	if (esip->esi_nevents) {
> -		log_printf(LOG_LEVEL_WARNING, 
> -			"(EVT) %d Events left on delivery list after finalize\n", 
> -			esip->esi_nevents);
> -	}
> -
> -	for (i = SA_EVT_HIGHEST_PRIORITY; i <= SA_EVT_LOWEST_PRIORITY; i++) {
> -		if (!list_empty(&esip->esi_events[i])) {
> -			log_printf(LOG_LEVEL_WARNING, 
> -				"(EVT) Events list not empty after finalize\n");
> -		}
> -	}
> -}
> -#endif
>  
>  	/*
>  	 * Delete track entry if there is one
> @@ -2675,11 +2681,12 @@
>  	 */
>  	if (!eci) {
>  		if (evtpkt->led_retention_time) {
> -			eci = create_channel(&evtpkt->led_chan_name);
> +			// eci = create_channel(&evtpkt->led_chan_name);

Is eci already defined above here?  If so, is the commented out code
needed?


>  			if (!eci) {
> -				log_printf(LOG_LEVEL_WARNING, "(EVT) Can't create channel %s\n",
> +				log_printf(LOG_LEVEL_WARNING, 
> +						"(EVT) Channel %s doesn't exist\n",
>  					evtpkt->led_chan_name.value);
> -					
> +				return 0;
>  			}
>  		} else {
>  			return 0;
> @@ -2828,9 +2835,10 @@
>  		 * since we're in recovery mode, so that we can save this message.
>  		 */
>  		if (!eci) {
> -			eci = create_channel(&evtpkt->led_chan_name);
> +			// eci = create_channel(&evtpkt->led_chan_name);

(same comment)
Is eci already defined above here?  If so, is the commented out code
needed?



>  			if (!eci) {
> -				log_printf(LOG_LEVEL_WARNING, "(EVT) Can't create channel %s\n",
> +				log_printf(LOG_LEVEL_WARNING, 
> +						"(EVT) Channel %s doesn't exist\n",
>  					evtpkt->led_chan_name.value);
>  					
>  				return 0;
> @@ -2999,7 +3007,6 @@
>  		inc_open_count(eci, mn->mn_node_info.nodeId);
>  
>  		if (mn->mn_node_info.nodeId == my_node->nodeId) {
> -			eci->esc_local_opens++;
>  			/*
>  			 * Complete one of our pending open requests
>  			 */
> @@ -3015,7 +3022,7 @@
>  		log_printf(LOG_LEVEL_DEBUG, 
>  				"(EVT) Open channel %s t %d, l %d, r %d\n",
>  				eci->esc_channel_name.value,
> -				eci->esc_total_opens, eci->esc_local_opens, 
> +				eci->esc_total_opens, eci->esc_local_opens,
>  				eci->esc_retained_count);
>  		break;
>  	}
> @@ -3031,18 +3038,19 @@
>  		if (!eci) {
>  			log_printf(LOG_LEVEL_NOTICE, 
>  					"(EVT) Channel close request for %s not found\n",
> -				cpkt->u.chc_chan);	
> +				cpkt->u.chc_chan.value);	
>  			break;
>  		}
>  
> -		if (mn->mn_node_info.nodeId == my_node->nodeId) {
> -			eci->esc_local_opens--;
> -		}
> -
>  		/*
>  		 * if last instance, we can free up assocated data.
>  		 */
>  		dec_open_count(eci, mn->mn_node_info.nodeId);
> +		log_printf(LOG_LEVEL_DEBUG, 
> +				"(EVT) Close channel %s t %d, l %d, r %d\n",
> +				eci->esc_channel_name.value,
> +				eci->esc_total_opens, eci->esc_local_opens,
> +				eci->esc_retained_count);
>  		delete_channel(eci);
>  		break;
>  




More information about the Openais mailing list