[Openais] Re: A bug for AMF

Steven Dake sdake at mvista.com
Sat Oct 2 14:13:15 PDT 2004


Miyotaka-san
comments inline

On Sat, 2004-10-02 at 13:27, Miyotaka Sakai wrote:
> steve,
> 
> I've found a bug for AMF and made a patch .
> I would like you to review this patch .
> 
> Several gmi_mcast messages couldn't be delivered .
> I changed gmi_mcast's message priorty from GMI_PRIO_HIGH to
> GMI_PRIO_RECOVERY ,then messages could be deliverd .
> 
> If you have another idea, Please tel me.
> 
> This patch add several mechanisms to ease to debug .
> ( a mechanism is watching AmfComponent contents )
> 

The changing of priorities needs a different solution...  I think I
messed this up for you when I added the recovery plug.  Could you fix it
up?

The way it should work is this:
messages sent during recovery (within the amf_confchg_fn function and
all the other messages related to them) should be sent at recovery
priority.  messages sent during normal operation should retain their
current priorities.  Since the state machine must now (with the recovery
plug) send messages at two different priorities depending on what state
the system is in, we need to keep track of the priorities.  It is
possible for the state machine to know the priority at which it should
send messages by placing the priority in the first message that starts
the state transitions.  Then every message that triggers a new message
to be sent should encode the priority of the message to the next state. 
If you need more details please ask.

It is possible that the output queues used in gmi_mcast are full.  In
this case, it is necessary to use gmi_token_callback_create.  Mark
Haverkamp has implemented this in the event service if you need an
example of using it.  This callback will be called the next time it may
be possible to send messages.  The callback should then resend the state
machine message that failed to send on an earlier try.

Why the signal stuff in main.c?

The added debug output looks fine.  Please try to use log_printf instead
of printf though.  Could you check in just the debug output as a
seperate commit after addressing the comment above?

Thanks
-steve

> If you accept this patch ,I'll check-in .
> Thanks !
> - Miyotaka Sakai.
> 
> 
> 
> ______________________________________________________________________
> --- openais.2004-10-01.21.30/exec/main.c	2004-10-02 13:30:03.000000000 +0900
> +++ openais/exec/main.c	2004-10-03 04:12:53.000000000 +0900
> @@ -143,6 +143,19 @@
>  	return (conn_info);
>  }
>  
> +static void aisexec_sigusr2 ( )
> +{
> +	amf_dump ();
> +
> +	signal (SIGUSR2 ,aisexec_sigusr2);
> +	return;
> +}
> +
> +static void aisexec_signal ( )
> +{
> +	signal (SIGUSR2 ,aisexec_sigusr2);
> +	return;
> +}
>  
>  struct sockaddr_in this_ip;
>  #define LOCALHOST_IP inet_addr("127.0.0.1")
> @@ -873,6 +886,8 @@
>  
>  	aisexec_poll_handle = poll_create ();
>  
> +	aisexec_signal ( );
> +
>  	/*
>  	 * if gmi_init doesn't have root priveleges, it cannot
>  	 * bind to a specific interface.  This only matters if
> --- openais.2004-10-01.21.30/exec/amf.c	2004-10-02 13:30:03.000000000 +0900
> +++ openais/exec/amf.c	2004-10-03 04:13:58.000000000 +0900
> @@ -394,6 +394,7 @@
>  	.libais_handlers			=	amf_libais_handlers,
>  	.libais_handlers_count		= sizeof (amf_libais_handlers) / sizeof (struct libais_handler),
>  	.aisexec_handler_fns		= amf_aisexec_handler_fns,
> +
>  	.aisexec_handler_fns_count	= sizeof (amf_aisexec_handler_fns) / sizeof (int (*)),
>  	.confchg_fn					= amf_confchg_fn,
>  	.libais_init_fn				= message_handler_req_amf_init,
> @@ -509,7 +510,7 @@
>  	iovecs[0].iov_base = (char *)&req_exec_amf_componentunregister;
>  	iovecs[0].iov_len = sizeof (req_exec_amf_componentunregister);
>  
> -	assert (gmi_mcast (&aisexec_groupname, iovecs, 1, GMI_PRIO_MED) == 0);
> +	assert (gmi_mcast (&aisexec_groupname, iovecs, 1, GMI_PRIO_RECOVERY) == 0);
>  }
>  
>  static void component_register (
> @@ -526,7 +527,6 @@
>  	}
>  	log_printf (LOG_LEVEL_DEBUG, "component_register: registering component %s\n",
>  		getSaNameT (&component->name));
> -	component->probableCause = SA_AMF_NOT_RESPONDING;
>  
>  	req_exec_amf_componentregister.header.size = sizeof (struct req_exec_amf_componentregister);
>  	req_exec_amf_componentregister.header.id = MESSAGE_REQ_EXEC_AMF_COMPONENTREGISTER;
> @@ -766,7 +766,7 @@
>  	iovecs[0].iov_base = (char *)&req_exec_amf_hastateset;
>  	iovecs[0].iov_len = sizeof (req_exec_amf_hastateset);
>  
> -	assert (gmi_mcast (&aisexec_groupname, iovecs, 1, GMI_PRIO_HIGH) == 0);
> +	assert (gmi_mcast (&aisexec_groupname, iovecs, 1, GMI_PRIO_RECOVERY) == 0);
>  }
>  
>  void readinessStateSetApi (struct saAmfComponent *component,
> @@ -857,7 +857,7 @@
>  	iovecs[0].iov_base = (char *)&req_exec_amf_readinessstateset;
>  	iovecs[0].iov_len = sizeof (req_exec_amf_readinessstateset);
>  
> -	assert (gmi_mcast (&aisexec_groupname, iovecs, 1, GMI_PRIO_HIGH) == 0);
> +	assert (gmi_mcast (&aisexec_groupname, iovecs, 1, GMI_PRIO_RECOVERY) == 0);
>  }
>  
>  #ifdef CMOPILE_OUT
> @@ -1266,7 +1266,7 @@
>  			/* noop - operational state */
>  			break;
>  		case AMF_ENABLED_UNLOCKED_STANDBY_REQUESTED:
> -			dsmEnabledUnlockedActiveRequested (component);
> +			dsmEnabledUnlockedStandbyRequested (component);
>  			break;
>  		case AMF_ENABLED_UNLOCKED_STANDBY_COMPLETED:
>  			/* noop - operational state */
> @@ -1699,7 +1699,7 @@
>  	if (component->source_addr.s_addr != source_addr->s_addr) {
>  		return;
>  	}
> -		
> +
>  	component->local = 1;
>  	component_unregister (component);
>  
> @@ -2474,3 +2474,114 @@
>  	return (0);
>  }
>  
> +static char *amf_disabledunlockedstate_ntoa (state)
> +{
> +	static char str_disabledstate[16];
> +	if (state == AMF_DISABLED_UNLOCKED_REGISTEREDORERRORCANCEL) {
> +		sprintf (str_disabledstate, "AMF_DISABLED_UNLOCKED_REGISTEREDORERRORCANCEL(%d)", state);
> +	} else if (state == AMF_DISABLED_UNLOCKED_FAILED) {
> +		sprintf (str_disabledstate, "AMF_DISABLED_UNLOCKED_FAILED(%d)", state);
> +	} else if (state == AMF_DISABLED_UNLOCKED_QUIESCED_REQUESTED) {
> +		sprintf (str_disabledstate, "AMF_DISABLED_UNLOCKED_QUIESCED_REQUESTED(%d)", state);
> +	} else if (state == AMF_DISABLED_UNLOCKED_QUIESCED_COMPLETED) {
> +		sprintf (str_disabledstate, "AMF_DISABLED_UNLOCKED_QUIESCED_COMPLETED(%d)", state);
> +	} else if (state == AMF_DISABLED_UNLOCKED_OUT_OF_SERVICE_REQUESTED) {
> +		sprintf (str_disabledstate, "AMF_DISABLED_UNLOCKED_OUT_OF_SERVICE_REQUESTED(%d)", state);
> +	} else if (state == AMF_DISABLED_UNLOCKED_OUT_OF_SERVICE_COMPLETED) {
> +		sprintf (str_disabledstate, "AMF_DISABLED_UNLOCKED_OUT_OF_SERVICE_COMPLETED(%d)", state);
> +	} else {
> +		sprintf (str_disabledstate, "Unkown(%d)", state);
> +	}
> +
> +	return (str_disabledstate);
> +}
> +
> +static char *amf_enabledunlockedstate_ntoa (state)
> +{
> +	static char str_enabledunlockedstate[16];
> +	if (state == AMF_ENABLED_UNLOCKED_INITIAL) {
> +		sprintf (str_enabledunlockedstate, "AMF_ENABLED_UNLOCKED_INITIAL(%d)", state);
> +	} else if (state == AMF_ENABLED_UNLOCKED_IN_SERVICE_REQUESTED) {
> +		sprintf (str_enabledunlockedstate, "AMF_ENABLED_UNLOCKED_IN_SERVICE_REQUESTED(%d)", state);
> +	} else if (state == AMF_ENABLED_UNLOCKED_IN_SERVICE_COMPLETED) {
> +		sprintf (str_enabledunlockedstate, "AMF_ENABLED_UNLOCKED_IN_SERVICE_COMPLETED(%d)", state);
> +	} else if (state == AMF_ENABLED_UNLOCKED_ACTIVE_REQUESTED) {
> +		sprintf (str_enabledunlockedstate, "AMF_ENABLED_UNLOCKED_ACTIVE_REQUESTED(%d)", state);
> +	} else if (state == AMF_ENABLED_UNLOCKED_ACTIVE_COMPLETED) {
> +		sprintf (str_enabledunlockedstate, "AMF_ENABLED_UNLOCKED_ACTIVE_COMPLETED(%d)", state);
> +	} else if (state == AMF_ENABLED_UNLOCKED_STANDBY_REQUESTED) {
> +		sprintf (str_enabledunlockedstate, "AMF_ENABLED_UNLOCKED_STANDBY_REQUESTED(%d)", state);
> +	} else if (state == AMF_ENABLED_UNLOCKED_STANDBY_COMPLETED) {
> +		sprintf (str_enabledunlockedstate, "AMF_ENABLED_UNLOCKED_STANDBY_COMPLETED(%d)", state);
> +	} else {
> +		sprintf (str_enabledunlockedstate, "Unkown(%d)", state);
> +	}
> +
> +	return (str_enabledunlockedstate);
> +}
> +
> +static char *amf_readinessstate_ntoa (readinessstate)
> +{
> +	static char str_readiness[16];
> +	if (readinessstate == SA_AMF_OUT_OF_SERVICE) {
> +		sprintf (str_readiness, "SA_AMF_OUT_OF_SERVICE(%d)", readinessstate);
> +	} else if (readinessstate == SA_AMF_IN_SERVICE) {
> +		sprintf (str_readiness, "SA_AMF_IN_SERVICE(%d)", readinessstate);
> +	} else if (readinessstate == SA_AMF_STOPPING) {
> +		sprintf (str_readiness, "SA_AMF_STOPPING(%d)", readinessstate);
> +	} else {
> +		sprintf (str_readiness, "Unkown (%d)", readinessstate);
> +	}
> +
> +	return (str_readiness);
> +}
> +
> +static char *amf_hastate_ntoa (hastate)
> +{
> +	static char str_hastate[16];
> +	if (hastate == SA_AMF_ACTIVE) {
> +		sprintf (str_hastate, "SA_AMF_ACTIVE(%d)", hastate);
> +	} else if (hastate == SA_AMF_STANDBY) {
> +		sprintf (str_hastate, "SA_AMF_STANDBY(%d)", hastate);
> +	} else if (hastate == SA_AMF_QUIESCED) {
> +		sprintf (str_hastate, "SA_AMF_QUIESCED(%d)", hastate);
> +	} else {
> +		sprintf (str_hastate, "Unkown (%d)", hastate);
> +	}
> +
> +	return (str_hastate);
> +}
> +
> +static void amf_dump_comp (struct saAmfComponent *component ,void *data)
> +{
> +	char name[64];
> +	data = NULL;
> +	printf ("----------------\n" );
> +	printf ("registered            = %d\n" ,component->registered);
> +	printf ("local                 = %d\n" ,component->local );
> +	printf ("source_addr           = %s\n" ,inet_ntoa (component->source_addr));
> +	memset (name, 0 , sizeof(name));
> +	memcpy (name, component->name.value, component->name.length);
> +	printf ("name                  = %s\n" ,name );
> +	printf ("currentReadinessState = %s\n" ,amf_readinessstate_ntoa (component->currentReadinessState));
> +	printf ("newReadinessState     = %s\n" ,amf_readinessstate_ntoa (component->newReadinessState));
> +	printf ("currentHAState        = %s\n" ,amf_hastate_ntoa (component->currentHAState));
> +	printf ("newHAState            = %s\n" ,amf_hastate_ntoa (component->newHAState));
> +	printf ("enabledUnlockedState  = %s\n" ,amf_enabledunlockedstate_ntoa (component->enabledUnlockedState));
> +	printf ("disabledUnlockedState = %s\n" ,amf_disabledunlockedstate_ntoa (component->disabledUnlockedState));
> +	printf ("probableCause         = %d\n" ,component->probableCause );
> +}
> +
> +void amf_dump ( )
> +{
> +	pid_t pid;
> +
> +	pid = fork ();
> +	if (pid == 0) {
> +		enumerate_components (amf_dump_comp, NULL);
> +		fflush (stdout);
> +		exit (0);
> +	}
> +
> +	return;
> +}
> --- openais.2004-10-01.21.30/exec/amf.h	2004-10-02 13:30:03.000000000 +0900
> +++ openais/exec/amf.h	2004-10-03 04:00:13.000000000 +0900
> @@ -52,6 +52,7 @@
>  	int trackActive;
>  };
>  
> +void amf_dump ();
>  extern struct service_handler amf_service_handler;
>  
>  #endif /* AMF_H_DEFINED */




More information about the Openais mailing list