[Openais] Patch AMF sync #4

Hans Feldt Hans.Feldt at ericsson.com
Tue Sep 5 00:35:39 PDT 2006


Committed revision 1236.

Hans Feldt wrote:
> This patch includes:
> 
> - "No need for DNS or /etc/hosts"
> 
> The call to gethostbyaddr() has been removed. This has been replaced by 
> a protocol where each node multicasts its hostname (obtained with 
> gethostname()).
> 
> - "Logical AMF nodes"
> 
> The AMF node name is no longer a hostname. The saAmfNodeClmNode 
> configuration attribute of the AMF node is now the hostname. This config 
> attribute is now mandatory. The change to amf.conf file shows required 
> changes.
> 
> - Some other AMF sync bug fixes
> 
> 
> This patch is probably in conflict with the one sent by Lars earlier. We 
> will sort that out before commit.
> 
> FYI, coming up is an amf.conf man page.
> 
> Regards,
> Hans
> 
> 
> ------------------------------------------------------------------------
> 
> Index: exec/amfnode.c
> ===================================================================
> --- exec/amfnode.c	(revision 1232)
> +++ exec/amfnode.c	(working copy)
> @@ -119,8 +119,8 @@
>  static void amf_node_acsm_enter_leaving_spontaneously(struct amf_node *node)
>  {
>  	ENTER("'%s'", node->name.value);
> -	node->synchronized = FALSE;
>  	node->saAmfNodeOperState = SA_AMF_OPERATIONAL_DISABLED;
> +	node->nodeid = 0;
>  }
>  
>  static void amf_node_acsm_enter_failing_over (struct amf_node *node)
> @@ -160,10 +160,10 @@
>   */
>  void amf_node_leave (struct amf_node *node)
>  {
> -	ENTER("'%s'", node->name.value);
> +	ENTER("'%s', CLM node '%s'", node->name.value,
> +		node->saAmfNodeClmNode.value);
>  	amf_node_acsm_enter_leaving_spontaneously(node);    
>  	amf_node_acsm_enter_failing_over (node);
> -
>  }
>  
>  /**
> @@ -297,19 +297,15 @@
>  {
>  	struct amf_node *node;
>  
> -	if (amf_cluster == NULL) {
> -		return NULL;
> -	}
> +	assert (name != NULL && amf_cluster != NULL);
>  
> -	assert (name != NULL);
> -
>  	for (node = amf_cluster->node_head; node != NULL; node = node->next) {
>  		if (name_match (&node->name, name)) {
>  			return node;
>  		}
>  	}
>  
> -	dprintf ("node %s not found!", name->value);
> +	dprintf ("node %s not found in configuration!", name->value);
>  
>  	return NULL;
>  }
> @@ -318,17 +314,36 @@
>  {
>  	struct amf_node *node;
>  
> +	assert (amf_cluster != NULL);
> +
>  	for (node = amf_cluster->node_head; node != NULL; node = node->next) {
>  		if (node->nodeid == nodeid) {
>  			return node;
>  		}
>  	}
>  
> -	dprintf ("node %u not found!", nodeid);
> +	dprintf ("node %u not found in configuration!", nodeid);
>  
>  	return NULL;
>  }
>  
> +struct amf_node *amf_node_find_by_hostname (const char *hostname) 
> +{
> +	struct amf_node *node;
> +
> +	assert (hostname != NULL && amf_cluster != NULL);
> +
> +	for (node = amf_cluster->node_head; node != NULL; node = node->next) {
> +		if (strcmp ((char*)node->saAmfNodeClmNode.value, hostname) == 0) {
> +			return node;
> +		}
> +	}
> +
> +	dprintf ("node %s not found in configuration!", hostname);
> +
> +	return NULL;
> +}
> +
>  static int all_applications_on_node_started (struct amf_node *node, 
>  	struct amf_cluster *cluster) 
>  {
> Index: exec/amfutil.c
> ===================================================================
> --- exec/amfutil.c	(revision 1232)
> +++ exec/amfutil.c	(working copy)
> @@ -62,11 +62,16 @@
>  #include "print.h"
>  #include "aispoll.h"
>  #include "main.h"
> +#include "service.h"
>  
>  #ifndef OPENAIS_CLUSTER_STARTUP_TIMEOUT
>  #define OPENAIS_CLUSTER_STARTUP_TIMEOUT 5000
>  #endif
>  
> +struct req_exec_amf_msg {
> +	mar_req_header_t header;
> +};
> +
>  static const char *presence_state_text[] = {
>  	"UNKNOWN",
>  	"UNINSTANTIATED",
> @@ -346,10 +351,12 @@
>  			break;
>  
>  		case AMF_NODE:
> -			if ((loc = strstr_rs (line, "saAmfNodeSuFailOverProb")) != 0) {
> +			if ((loc = strstr_rs (line, "saAmfNodeSuFailOverProb=")) != 0) {
>  				node->saAmfNodeSuFailOverProb = atol(loc);
> -			} else if ((loc = strstr_rs (line, "saAmfNodeSuFailoverMax")) != 0) {
> +			} else if ((loc = strstr_rs (line, "saAmfNodeSuFailoverMax=")) != 0) {
>  				node->saAmfNodeSuFailoverMax = atol(loc);
> +			} else if ((loc = strstr_rs (line, "saAmfNodeClmNode=")) != 0) {
> +				setSaNameT (&node->saAmfNodeClmNode, trim_str (loc));
>  			} else if ((loc = strstr_rs (line, "saAmfNodeAutoRepair=")) != 0) {
>  				if (strcmp (loc, "true") == 0) {
>  					node->saAmfNodeAutoRepair = SA_TRUE;
> @@ -383,6 +390,10 @@
>  					error_reason = "saAmfNodeSuFailoverMax missing";
>  					goto parse_error;
>  				}
> +				if (node->saAmfNodeClmNode.length == 0) {
> +					error_reason = "saAmfNodeClmNode missing";
> +					goto parse_error;
> +				}
>  				current_parse = AMF_CLUSTER;
>  			} else {
>  				goto parse_error;
> @@ -902,12 +913,16 @@
>  	log_printf (LOG_INFO, "safCluster=%s", getSaNameT(&cluster->name));
>  	log_printf (LOG_INFO, "  admin state: %s\n",
>  		admin_state_text[cluster->saAmfClusterAdminState]);
> +	log_printf (LOG_INFO, "  state:       %u\n", cluster->state);
>  	for (node = cluster->node_head; node != NULL; node = node->next) {
>  		log_printf (LOG_INFO, "  safNode=%s\n", getSaNameT (&node->name));
> +		log_printf (LOG_INFO, "    CLM Node:    %s\n", getSaNameT (&node->saAmfNodeClmNode));
> +		log_printf (LOG_INFO, "    node ID:     %u\n", node->nodeid);
>  		log_printf (LOG_INFO, "    admin state: %s\n",
>  			admin_state_text[node->saAmfNodeAdminState]);
>  		log_printf (LOG_INFO, "    oper state:  %s\n",
>  			oper_state_text[node->saAmfNodeOperState]);
> +		log_printf (LOG_INFO, "    acsm state:  %u\n", node->acsm_state);
>  	}
>  	for (app = cluster->application_head; app != NULL; app = app->next) {
>  		log_printf (LOG_INFO, "  safApp=%s\n", getSaNameT(&app->name));
> @@ -916,6 +931,8 @@
>  		log_printf (LOG_INFO, "    num_sg:      %d\n", app->saAmfApplicationCurrNumSG);
>  		for (sg = app->sg_head; sg != NULL; sg = sg->next) {
>  			log_printf (LOG_INFO, "    safSg=%s\n", getSaNameT(&sg->name));
> +			log_printf (LOG_INFO, "      avail_state:        %u\n",
> +				sg->avail_state);
>  			log_printf (LOG_INFO, "      admin state:        %s\n",
>  				admin_state_text[sg->saAmfSGAdminState]);
>  			log_printf (LOG_INFO, "      assigned SUs        %d\n",
> @@ -1272,4 +1289,40 @@
>  
>  }
>  
> +void amf_msg_mcast (int id, void *buf, size_t len)
> +{
> +	struct req_exec_amf_msg msg;
> +	struct iovec iov[2];
> +	int iov_cnt;
> +	int res;
>  
> +//	ENTER ("%u, %p, %u", id, buf, len);
> +
> +	msg.header.size = sizeof (msg);
> +	msg.header.id = SERVICE_ID_MAKE (AMF_SERVICE, id);
> +	iov[0].iov_base = &msg;
> +	iov[0].iov_len  = sizeof (msg);
> +
> +	if (buf == NULL) {
> +		msg.header.size = sizeof (msg);
> +		iov_cnt = 1;
> +	} else {
> +		msg.header.size = sizeof (msg) + len;
> +		iov[1].iov_base = buf;
> +		iov[1].iov_len  = len;
> +		iov_cnt = 2;
> +	}
> +
> +	res = totempg_groups_mcast_joined (
> +		openais_group_handle, iov, iov_cnt, TOTEMPG_AGREED);
> +
> +	if (res != 0) {
> +		dprintf("Unable to send %d bytes\n", msg.header.size);
> +		openais_exit_error (AIS_DONE_FATAL_ERR);
> +	}
> +}
> +
> +void amf_util_init (void)
> +{
> +	log_init ("AMF");
> +}
> Index: exec/amf.c
> ===================================================================
> --- exec/amf.c	(revision 1233)
> +++ exec/amf.c	(working copy)
> @@ -71,10 +71,12 @@
>   * IDLE                    node_joined                    PROBING-1
>   * PROBING-1               timer1 timeout           A1    PROBING-2
>   * PROBING-1               SYNC_START               A2    UPDATING_CLUSTER_MODEL
> + * PROBING-1               node_joined              A7    PROBING-1
>   * PROBING-2               SYNC_START[From me]            CREATING_CLUSTER_MODEL
>   * PROBING-2               SYNC_START[From other]         UPDATING_CLUSTER_MODEL
> - * CREATING_CLUSTER_MODEL  Model created                  SYNCHRONIZING
> - * SYNCHRONIZING           SYNC_READY                     NORMAL_OPERATION
> + * PROBING-2               node_joined              A7    PROBING-2
> + * CREATING_CLUSTER_MODEL  Model created            A8    SYNCHRONIZING
> + * SYNCHRONIZING           SYNC_READY               A10   NORMAL_OPERATION
>   * SYNCHRONIZING           node_left[sync_master]   A5    SYNCHRONIZING
>   * SYNCHRONIZING           node_joined                    SYNCHRONIZING
>   * UPDATING_CLUSTER_MODEL  SYNC_DATA                A3    UPDATING_CLUSTER_MODEL
> @@ -82,7 +84,9 @@
>   * UPDATING_CLUSTER_MODEL  SYNC_START               A5    NORMAL_OPERATION
>   * UPDATING_CLUSTER_MODEL  node_left[sync_master]         PROBING-1
>   * NORMAL_OPERATION        node_joined                    SYNCHRONIZING
> - * NORMAL_OPERATION        node_left[sync_master] A6      NORMAL_OPERATION
> + * NORMAL_OPERATION        node_left[sync_master]   A6    NORMAL_OPERATION
> + * NORMAL_OPERATION        SYNC_REQUEST             A8    NORMAL_OPERATION
> + * Any                     SYNC_REQUEST             A9    No change
>   *
>   * 1.2 State Description
>   * =====================
> @@ -105,7 +109,10 @@
>   * A4 - Create cluster model
>   * A5 - Free received SYNC_DATA
>   * A6 - Calculate new sync master
> - * 
> + * A7 - Multicast SYNC_REQUEST message
> + * A8 - Update AMF node object(s) with CLM nodeid
> + * A9 - Save CLM nodeid & hostname
> + * A10- Delete CLM nodes
>   */
>  
>  #include <sys/types.h>
> @@ -144,10 +151,6 @@
>  #include "objdb.h"
>  #include "print.h"
>  
> -#define LOG_LEVEL_FROM_LIB LOG_LEVEL_DEBUG
> -#define LOG_LEVEL_FROM_GMI LOG_LEVEL_DEBUG
> -#define LOG_LEVEL_ENTER_FUNC LOG_LEVEL_DEBUG
> -
>  #ifndef HOST_NAME_MAX
>  # define HOST_NAME_MAX 255
>  #endif
> @@ -206,6 +209,8 @@
>  	void *message, unsigned int nodeid);
>  static void message_handler_req_exec_amf_cluster_start_tmo (
>  	void *message, unsigned int nodeid);
> +static void message_handler_req_exec_amf_sync_request (
> +	void *message, unsigned int nodeid);
>  static void amf_dump_fn (void);
>  static void amf_sync_init (void);
>  static int amf_sync_process (void);
> @@ -338,7 +343,10 @@
>  	},
>  	{
>  		.exec_handler_fn = message_handler_req_exec_amf_cluster_start_tmo,
> -	}
> +	},
> +	{
> +		.exec_handler_fn = message_handler_req_exec_amf_sync_request,
> +	},
>  };
>  
>  /*
> @@ -429,6 +437,11 @@
>  	amf_object_type_t object_type;
>  };
>  
> +struct req_exec_amf_sync_request {
> +	mar_req_header_t header;
> +	char hostname[HOST_NAME_MAX + 1];
> +};
> +
>  static const char *scsm_state_names[] = {
>  	"Unknown",
>  	"IDLE",
> @@ -463,7 +476,6 @@
>  	/* node ID of current sync master */
>  	unsigned int               sync_master;
>  
> -	unsigned int              *member_list;
>  	unsigned int              *joined_list;
>  	unsigned int               joined_list_entries;
>  	struct amf_cluster        *cluster;
> @@ -491,34 +503,82 @@
>   */
>  static struct scsm_descriptor scsm;
>  
> -/* IMPL */
> -static char *hostname_get (unsigned int nodeid)
> +typedef struct clm_node {
> +	unsigned int nodeid;
> +	char hostname[HOST_NAME_MAX + 1];
> +	struct clm_node *next;
> +} clm_node_t;
> +
> +static char hostname[HOST_NAME_MAX + 1];
> +
> +/*
> + * Nodes in the cluster, only used for initial start
> + * since before the AMF node object exist, we don't
> + * have storage for the information received in
> + * SYNC_REQUEST msg.
> + */
> +static clm_node_t *clm_nodes;
> +
> +/******************************************************************************
> + * Internal (static) utility functions
> + *****************************************************************************/
> +
> +/**
> + * Find a CLM node object using nodeid as query. Allocate and
> + * return new object if not found.
> + * 
> + * @param nodeid
> + * 
> + * @return clm_node_t*
> + */
> +static clm_node_t *clm_node_find_by_nodeid (unsigned int nodeid)
>  {
> -        struct totem_ip_address interfaces[INTERFACE_MAX];
> -        char **status;
> -        unsigned int iface_count;
> -        int res;
> -        struct hostent *ent;
> -                                                                                                                   
> -        res = totempg_ifaces_get (nodeid, interfaces, &status, &iface_count);
> -        if (res == -1) {
> -                log_printf (LOG_LEVEL_ERROR, "totempg_ifaces_get failed for %u", nodeid);
> -                openais_exit_error (AIS_DONE_FATAL_ERR);
> -        }
> -        if (iface_count > 0) {
> -                ent = gethostbyaddr (interfaces[0].addr, 4, interfaces[0].family);
> -                if (ent == NULL) {
> -                        log_printf (LOG_LEVEL_ERROR, "gethostbyaddr failed: %d\n", h_errno);
> -                        openais_exit_error (AIS_DONE_FATAL_ERR);
> -                }
> -                                                                                                                   
> -                return ent->h_name;
> -        }
> -                                                                                                                   
> -        return NULL;
> +	clm_node_t *clm_node;
> +
> +	for (clm_node = clm_nodes; clm_node != NULL; clm_node = clm_node->next) {
> +		if (clm_node->nodeid == nodeid) {
> +			return clm_node;
> +		}
> +	}
> +
> +	clm_node = amf_malloc (sizeof (clm_node_t));
> +	clm_node->nodeid = nodeid;
> +	clm_node->next = clm_nodes;
> +	clm_nodes = clm_node;
> +
> +	return clm_node;
>  }
>  
>  /**
> + * Init nodeids in the AMF node objects using information in the
> + * CLM node objects.
> + */
> +static void nodeids_init (void)
> +{
> +	amf_node_t *amf_node;
> +	clm_node_t *clm_node;
> +
> +	ENTER ("");
> +
> +	for (clm_node = clm_nodes; clm_node != NULL; clm_node = clm_node->next) {
> +        /*
> +         * Iterate all AMF nodes if several AMF nodes are mapped to this
> +         * particular CLM node.* 
> +		*/
> +		for (amf_node = amf_cluster->node_head; amf_node != NULL;
> +			  amf_node = amf_node->next) {
> +
> +			if (strcmp ((char*)amf_node->saAmfNodeClmNode.value,
> +				clm_node->hostname) == 0) {
> +
> +				dprintf ("%s id set to %u", amf_node->name.value, clm_node->nodeid);
> +				amf_node->nodeid = clm_node->nodeid;
> +			}
> +		}
> +	}
> +}
> +
> +/**
>   * Return pointer to this node object.
>   * 
>   * @param cluster
> @@ -527,16 +587,16 @@
>   */
>  static struct amf_node *get_this_node_obj (struct amf_cluster *cluster)
>  {
> -	SaClmClusterNodeT *clm_node = main_clm_get_by_nodeid (SA_CLM_LOCAL_NODE_ID);
> -	char *hostname;
> -	SaNameT name;
> +	char hostname[HOST_NAME_MAX + 1];
>  
> -	assert (clm_node != NULL);
> -	hostname = hostname_get (clm_node->nodeId);
> -	assert (hostname != NULL);
> -	setSaNameT (&name, hostname);
> +	assert (cluster != NULL);
>  
> -	return amf_node_find (&name);
> +	if (gethostname (hostname, sizeof(hostname)) == -1) {
> +		log_printf (LOG_LEVEL_ERROR, "gethostname failed: %d", errno);
> +		openais_exit_error (AIS_DONE_FATAL_ERR);
> +	}
> +
> +	return amf_node_find_by_hostname (hostname);
>  }
>  
>  /**
> @@ -551,37 +611,6 @@
>  }
>  
>  /**
> - * Multicast SYNC_START message
> - * 
> - * @return int
> - */
> -static int mcast_sync_start (void)
> -{
> -	struct req_exec_amf_sync_data req_exec;
> -	struct iovec iov[1];
> -	int res;
> -
> -	SYNCTRACE ("state %s", scsm_state_names[scsm.state]);
> -
> -	req_exec.header.size = sizeof (struct req_exec_amf_sync_data);
> -	req_exec.header.id =
> -		SERVICE_ID_MAKE (AMF_SERVICE, MESSAGE_REQ_EXEC_AMF_SYNC_START);
> -
> -	iov[0].iov_base = &req_exec;
> -	iov[0].iov_len  = sizeof (struct req_exec_amf_sync_data);
> -
> -	res = totempg_groups_mcast_joined (
> -		openais_group_handle, iov, 1, TOTEMPG_AGREED);
> -
> -	if (res != 0) {
> -		dprintf("Unable to send %d bytes of sync data\n", req_exec.header.size);
> -		openais_exit_error (AIS_DONE_FATAL_ERR);
> -	}
> -
> -	return res;
> -}
> -
> -/**
>   * Multicast SYNC_DATA message containing a model object.
>   * 
>   * @param buf
> @@ -620,34 +649,6 @@
>  }
>  
>  /**
> - * Multicast SYNC_READY message
> - */
> -static void mcast_sync_ready (void)
> -{
> -	struct req_exec_amf_sync_data req_exec;
> -	struct iovec iov[1];
> -	int res;
> -
> -	SYNCTRACE ("state %s", scsm_state_names[scsm.state]);
> -	
> -	
> -	req_exec.header.size = sizeof (struct req_exec_amf_sync_data);
> -	req_exec.header.id =
> -		SERVICE_ID_MAKE (AMF_SERVICE, MESSAGE_REQ_EXEC_AMF_SYNC_READY);
> -
> -	iov[0].iov_base = &req_exec;
> -	iov[0].iov_len  = sizeof (struct req_exec_amf_sync_data);
> -
> -	res = totempg_groups_mcast_joined (
> -		openais_group_handle, iov, 1, TOTEMPG_AGREED);
> -
> -	if (res != 0) {
> -		dprintf("Unable to send %d bytes of sync data\n", req_exec.header.size);
> -		openais_exit_error (AIS_DONE_FATAL_ERR);
> -	}
> -}
> -
> -/**
>   * Timer callback function. The time waiting for external
>   * synchronisation has expired, start competing with other
>   * nodes to determine who should read config file.
> @@ -656,9 +657,7 @@
>  static void timer_function_scsm_timer1_tmo (void *data)
>  {
>  	SYNCTRACE ("");
> -	if (mcast_sync_start () != 0) {
> -		openais_exit_error (AIS_DONE_FATAL_ERR);
> -	}
> +	amf_msg_mcast (MESSAGE_REQ_EXEC_AMF_SYNC_START, NULL, 0);
>  	sync_state_set (PROBING_2);
>  }
>  
> @@ -686,7 +685,7 @@
>  		}
>  	} while (res != 0);
>  
> -	mcast_sync_ready ();
> +	amf_msg_mcast (MESSAGE_REQ_EXEC_AMF_SYNC_READY, NULL, 0);
>  }
>  
>  /**
> @@ -708,10 +707,12 @@
>  
>  	if (this_amf_node == NULL) {
>  		log_printf (LOG_LEVEL_INFO,
> -			"This node is not configured as an AMF node, disabling.");
> +			"Info: This node is not configured as an AMF node, disabling.");
>  		return -1;
>  	}
>  
> +	this_amf_node->nodeid = this_ip->nodeid;
> +
>  	return 0;
>  }
>  
> @@ -1120,46 +1121,26 @@
>  	return 0;
>  }
>  
> +/**
> + * Start the AMF nodes that has joined
> + */
>  static void joined_nodes_start (void)
>  {
>  	int i;
> +	struct amf_node *node;
>  
>  	for (i = 0; i < scsm.joined_list_entries; i++) {
> -		SaNameT name;
> -		struct amf_node *node;
> +		node = amf_node_find_by_nodeid (scsm.joined_list[i]);
>  
> -		setSaNameT (&name, hostname_get (scsm.joined_list[i]));
> -		node = amf_node_find (&name);
>  		if (node != NULL) {
> -			node->nodeid = scsm.joined_list[i];
>  			amf_node_sync_ready (node);
>  		} else {
>  			log_printf (LOG_LEVEL_INFO,
> -				"Node %s is not configured as an AMF node", name.value);
> +				"Info: Node %u is not configured as an AMF node", scsm.joined_list[i]);
>  		}
>  	}
>  }
>  
> -static void init_nodeids (void)
> -{
> -	int i;
> -
> -	ENTER ("");
> -
> -	for (i = 0; scsm.member_list[i] != 0; i++) {
> -		SaNameT name;
> -		struct amf_node *node;
> -
> -		setSaNameT (&name, hostname_get (scsm.member_list[i]));
> -		node = amf_node_find (&name);
> -
> -		assert (node != NULL);
> -		node->nodeid = scsm.member_list[i];
> -	}
> -
> -	LEAVE ("");
> -}
> -
>  /******************************************************************************
>   * AMF Framework callback implementation                       *
>   *****************************************************************************/
> @@ -1167,6 +1148,7 @@
>  static void amf_sync_init (void)
>  {
>  	SYNCTRACE ("state %s", scsm_state_names[scsm.state]);
> +
>  	switch (scsm.state) {
>  		case UNCONFIGURED:
>  		case PROBING_1:
> @@ -1184,8 +1166,9 @@
>  	}
>  
>  	if (scsm.state == SYNCHRONIZING && scsm.sync_master == this_ip->nodeid) {
> -		mcast_sync_start ();
> +		amf_msg_mcast (MESSAGE_REQ_EXEC_AMF_SYNC_START, NULL, 0);
>  		assert (amf_cluster != NULL);
> +		nodeids_init ();
>  		scsm.cluster = amf_cluster;
>  		scsm.node = amf_cluster->node_head;
>  		scsm.app = amf_cluster->application_head;
> @@ -1271,23 +1254,32 @@
>  }
>  
>  /**
> - * SCSM normal exit function for state SYNCHRONIZING
> - * All synced objects are now commited, start node/cluster.
> + * SCSM normal exit function for states SYNCHRONIZING &
> + * UPDATING_CLUSTER_MODEL. All synced objects are now
> + * commited, start node/cluster.
>   */
>  static void amf_sync_activate (void)
>  {
> +	clm_node_t *clm_node = clm_nodes;
> +
>  	SYNCTRACE ("state %s", scsm_state_names[scsm.state]);
>  
>  	switch (scsm.state) {
>  		case SYNCHRONIZING:
> +			/* Delete all CLM nodes, not needed any longer. */
> +			while (clm_node != NULL) {
> +				clm_node_t *tmp = clm_node;
> +				clm_node = clm_node->next;
> +				free (tmp);
> +			}
> +			clm_nodes = NULL;
>  			sync_state_set (NORMAL_OPERATION);
> -			init_nodeids ();
>  			/* TODO: Remove dependencies to amf_cluster->state */
>  			switch (amf_cluster->state) {
> -				case CLUSTER_STARTED: {
> +				case CLUSTER_STARTED:
> +				case CLUSTER_STARTING_WORKLOAD:
>  					joined_nodes_start ();
>  					break;
> -				}
>  				case CLUSTER_STARTING_COMPONENTS: {
>  					amf_cluster_sync_ready (amf_cluster);
>  					joined_nodes_start ();
> @@ -1305,14 +1297,15 @@
>  			scsm.cluster = NULL;
>  			this_amf_node = get_this_node_obj (amf_cluster);
>  			sync_state_set (NORMAL_OPERATION);
> -			init_nodeids ();
>  			if (this_amf_node != NULL) {
> +				this_amf_node->nodeid = this_ip->nodeid;
>  #ifdef AMF_DEBUG
>  				amf_runtime_attributes_print (amf_cluster);
>  #endif
>  				/* TODO: Remove dependencies to amf_cluster->state */
>  				switch (amf_cluster->state) {
>  					case CLUSTER_STARTED: {
> +					case CLUSTER_STARTING_WORKLOAD:
>  						amf_node_sync_ready (this_amf_node); 
>  						break;
>  					}
> @@ -1328,7 +1321,7 @@
>  				}
>  			} else {
>  				log_printf (LOG_LEVEL_INFO,
> -					"This node is not configured as an AMF node, disabling.");
> +					"Info: This node is not configured as an AMF node, disabling.");
>  				sync_state_set (UNCONFIGURED);
>  			}
>  			break;
> @@ -1341,7 +1334,7 @@
>  			assert (0);
>  	}
>  
> -	LEAVE_VOID ();
> +	SYNCTRACE ("");
>  }
>  
>  /**
> @@ -1355,6 +1348,11 @@
>  {
>  	log_init ("AMF");
>  
> +	if (gethostname (hostname, sizeof (hostname)) == -1) {
> +		log_printf (LOG_LEVEL_ERROR, "gethostname failed: %d", errno);
> +		openais_exit_error (AIS_DONE_FATAL_ERR);
> +	}
> +
>  	if (!amf_enabled (objdb)) {
>  		sync_state_set (UNCONFIGURED);
>  		return 0;
> @@ -1369,6 +1367,7 @@
>  	amf_su_init();
>  	amf_comp_init();
>  	amf_si_init();
> +	amf_util_init ();
>  
>  	return (0);
>  }
> @@ -1391,34 +1390,21 @@
>  	unsigned int *joined_list, int joined_list_entries,
>  	struct memb_ring_id *ring_id)
>  {
> -	unsigned int i;
> -
>  	ENTER ("mnum: %d, jnum: %d, lnum: %d, sync state: %s, ring ID %llu rep %s\n",
>  		member_list_entries, joined_list_entries, left_list_entries,
>  		scsm_state_names[scsm.state], ring_id->seq, totemip_print (&ring_id->rep));
>  
> +	/*
> +	* Save nodes that joined, needed to initialize each
> +	* node's totem node id later.
> +	 */
>  	scsm.joined_list_entries = joined_list_entries;
>  	if (scsm.joined_list != NULL) {
>  		free (scsm.joined_list);
>  	}
>  	scsm.joined_list = amf_malloc (joined_list_entries * sizeof (unsigned int));
> -	for (i = 0; i < joined_list_entries; i++) {
> -		scsm.joined_list[i] = joined_list[i];
> -	}
> +	memcpy (scsm.joined_list, joined_list, sizeof (unsigned int) * joined_list_entries);
>  
> -	/**
> -     * Save current members of the cluster, needed to initialize
> -     * each node's totem node id later.
> -     */
> -	if (scsm.member_list != NULL) {
> -		free (scsm.member_list);
> -	}
> -	scsm.member_list = amf_malloc ((member_list_entries + 1) * sizeof (unsigned int));
> -	for (i = 0; i < member_list_entries; i++) {
> -		scsm.member_list[i] = member_list[i];
> -	}
> -	scsm.member_list[i] = 0;
> -
>  	switch (scsm.state) {
>  		case IDLE: {
>  			sync_state_set (PROBING_1);
> @@ -1432,7 +1418,11 @@
>  		case PROBING_1:
>  			/* fall-through */
>  		case PROBING_2:
> -			/* fall-through */
> +			if (joined_list_entries > 0) {
> +				amf_msg_mcast (MESSAGE_REQ_EXEC_AMF_SYNC_REQUEST,
> +					hostname, strlen (hostname) + 1);
> +			}
> +			break;
>  		case UNCONFIGURED:
>  			break;
>  		case UPDATING_CLUSTER_MODEL:
> @@ -1453,11 +1443,11 @@
>  		case SYNCHRONIZING: {
>  			if (joined_list_entries > 0 && scsm.sync_master == this_ip->nodeid) {
>  				/* restart sync */
> -				mcast_sync_start ();
> +				amf_msg_mcast (MESSAGE_REQ_EXEC_AMF_SYNC_START, NULL, 0);
>  			}
> -            /** If the sync master left the cluster, calculate a new sync
> -             *  master between the remaining nodes in the cluster excluding
> -             *  the nodes we are just syncing.
> +			/* If the sync master left the cluster, calculate a new sync
> +			*  master between the remaining nodes in the cluster excluding
> +			*  the nodes we are just syncing.
>  			 */
>  			if (!is_member (scsm.sync_master, member_list, member_list_entries)) {
>  				scsm.sync_master =
> @@ -1468,14 +1458,14 @@
>  				if (scsm.sync_master == this_ip->nodeid) {
>  					/* restart sync */
>  					SYNCTRACE ("I am (new) sync master");
> -					mcast_sync_start ();
> +					amf_msg_mcast (MESSAGE_REQ_EXEC_AMF_SYNC_START, NULL, 0);
>  				}
>  			}
>  			break;
>  		}
>  		case NORMAL_OPERATION: {
> -            /** If the sync master left the cluster, calculate a new sync
> -             *  master between the remaining nodes in the cluster.
> +			/* If the sync master left the cluster, calculate a new sync
> +			*  master between the remaining nodes in the cluster.
>  			 */
>  			if (!is_member (scsm.sync_master, member_list, member_list_entries)) {
>  				scsm.sync_master =
> @@ -1493,8 +1483,9 @@
>  
>  				for (i = 0; i < left_list_entries; i++) {
>  					node = amf_node_find_by_nodeid (left_list[i]);
> -					assert (node != NULL);
> -					amf_node_leave(node);
> +					if (node != NULL) {
> +						amf_node_leave(node);
> +					}
>  				}
>  			}
>  			break;
> @@ -1552,7 +1543,7 @@
>  
>  	comp = amf_comp_find (amf_cluster, &req_exec->compName);
>  	assert (comp != NULL);
> -	ENTER ("'%s'", comp->name.value);
> +	TRACE1 ("ComponentRegister: '%s'", comp->name.value);
>  	error = amf_comp_register (comp);
>  
>  	if (amf_su_is_local (comp->su)) {
> @@ -1591,7 +1582,7 @@
>  
>  	comp = amf_comp_find (amf_cluster, &req_exec->compName);
>  	if (comp == NULL) {
> -		log_printf (LOG_ERR, "'%s' not found", req_exec->compName.value);
> +		log_printf (LOG_ERR, "Error: '%s' not found", req_exec->compName.value);
>  		return;
>  	}
>  
> @@ -1611,7 +1602,7 @@
>  
>  	comp = amf_comp_find (amf_cluster, &req_exec->compName);
>  	if (comp == NULL) {
> -		log_printf (LOG_ERR, "'%s' not found", req_exec->compName.value);
> +		log_printf (LOG_ERR, "Error: '%s' not found", req_exec->compName.value);
>  		return;
>  	}
>  
> @@ -1634,7 +1625,7 @@
>  		return;
>  	}
>  
> -	ENTER ("%s", req_exec->dn.value);
> +	TRACE1 ("AmfResponse: %s", req_exec->dn.value);
>  
>  	comp = amf_comp_response_2 (
>  		req_exec->interface, &req_exec->dn, req_exec->error, &retval);
> @@ -1834,6 +1825,35 @@
>  	}
>  }
>  
> +static void message_handler_req_exec_amf_sync_request (
> +	void *message, unsigned int nodeid)
> +{
> +	struct req_exec_amf_sync_request *req_exec = message;
> +	clm_node_t *clm_node;
> +
> +	SYNCTRACE ("from: %s, name: %s, state %s", totempg_ifaces_print (nodeid),
> +		req_exec->hostname, scsm_state_names[scsm.state]);
> +
> +	clm_node = clm_node_find_by_nodeid (nodeid);
> +	assert (clm_node != NULL);
> +	strcpy (clm_node->hostname, req_exec->hostname);
> +
> +	if (scsm.state == NORMAL_OPERATION) {
> +		amf_node_t *amf_node = amf_cluster->node_head;
> +		/*
> +		 * Iterate all AMF nodes if several AMF nodes are mapped to this
> +         * particular CLM node.
> +		*/
> +		for (; amf_node != NULL; amf_node = amf_node->next) {
> +			if (strcmp ((char*)amf_node->saAmfNodeClmNode.value,
> +				req_exec->hostname) == 0) {
> +
> +				amf_node->nodeid = nodeid;
> +			}
> +		}
> +	}
> +}
> +
>  /*****************************************************************************
>   * Library Interface Implementation
>   ****************************************************************************/
> @@ -1853,7 +1873,7 @@
>  		struct iovec iovec;
>  		struct amf_pd *amf_pd = openais_conn_private_data_get (conn);
>  
> -		TRACE2("Lib comp register '%s'", req_lib->compName.value);
> +		TRACE2("Comp register '%s'", req_lib->compName.value);
>  		comp->conn = conn;
>  		amf_pd->comp = comp;
>  		req_exec.header.size = sizeof (struct req_exec_amf_comp_register);
> @@ -1868,7 +1888,7 @@
>  			&iovec, 1, TOTEMPG_AGREED) == 0);
>  	} else {
>  		struct res_lib_amf_componentregister res_lib;
> -		log_printf (LOG_ERR, "Lib comp register: comp '%s' not found", req_lib->compName.value);
> +		log_printf (LOG_ERR, "Error: Comp register: '%s' not found", req_lib->compName.value);
>  		res_lib.header.id = MESSAGE_RES_AMF_COMPONENTREGISTER;
>  		res_lib.header.size = sizeof (struct res_lib_amf_componentregister);
>  		res_lib.header.error = SA_AIS_ERR_INVALID_PARAM;
> @@ -1931,8 +1951,6 @@
>  	struct amf_comp *comp;
>  	SaAisErrorT error = SA_AIS_OK;
>  
> -	assert (scsm.state == NORMAL_OPERATION);
> -
>  	comp = amf_comp_find (amf_cluster, &req_lib->compName);
>  
>  	if (comp != NULL) {
> @@ -1961,8 +1979,6 @@
>  	struct amf_comp *comp;
>  	SaAisErrorT error = SA_AIS_OK;
>  
> -	assert (scsm.state == NORMAL_OPERATION);
> -
>  	comp = amf_comp_find (amf_cluster, &req_lib->compName);
>  	if (comp != NULL) {
>  		error = amf_comp_healthcheck_confirm (
> @@ -1987,8 +2003,6 @@
>  	struct amf_comp *comp;
>  	SaAisErrorT error = SA_AIS_OK;
>  
> -	assert (scsm.state == NORMAL_OPERATION);
> -
>  	comp = amf_comp_find (amf_cluster, &req_lib->compName);
>  	if (comp != NULL) {
>  		error = amf_comp_healthcheck_stop (comp, &req_lib->healthcheckKey);
> @@ -2012,8 +2026,6 @@
>  	SaAmfHAStateT ha_state;
>  	SaAisErrorT error;
>  
> -	assert (scsm.state == NORMAL_OPERATION);
> -
>  	comp = amf_comp_find (amf_cluster, &req_lib->compName);
>  	if (comp != NULL) {
>  		error = amf_comp_hastate_get (comp, &req_lib->csiName, &ha_state);
> @@ -2305,6 +2317,8 @@
>  	res_lib.header.size = sizeof (struct res_lib_amf_response);
>  	res_lib.header.error = retval;
>  
> +//	ENTER ("");
> +
>  	if (openais_conn_send_response (conn, &res_lib, sizeof (res_lib)) != 0) {
>  		openais_exit_error (AIS_DONE_FATAL_ERR);
>  	}
> Index: exec/amf.h
> ===================================================================
> --- exec/amf.h	(revision 1232)
> +++ exec/amf.h	(working copy)
> @@ -172,7 +172,7 @@
>  	enum cluster_states state;
>  };
>  
> -struct amf_node {
> +typedef struct amf_node {
>  	/* Configuration Attributes */
>  	SaNameT name;
>  	SaNameT saAmfNodeClmNode;
> @@ -193,8 +193,7 @@
>  	unsigned int nodeid;
>  	struct amf_node *next;
>  	amf_node_acsm_state_t acsm_state;
> -	int synchronized;
> -};
> +} amf_node_t;
>  
>  struct amf_application {
>  	/* Configuration Attributes */
> @@ -501,7 +500,8 @@
>  	MESSAGE_REQ_EXEC_AMF_SYNC_START = 5,
>  	MESSAGE_REQ_EXEC_AMF_SYNC_DATA = 6,
>  	MESSAGE_REQ_EXEC_AMF_SYNC_READY = 7,
> -	MESSAGE_REQ_EXEC_AMF_CLUSTER_START_TMO = 8
> +	MESSAGE_REQ_EXEC_AMF_CLUSTER_START_TMO = 8,
> +	MESSAGE_REQ_EXEC_AMF_SYNC_REQUEST = 9
>  };
>  
>  struct req_exec_amf_clc_cleanup_completed {
> @@ -535,7 +535,6 @@
>  extern const char *amf_ha_state (int state);
>  extern const char *amf_readiness_state (int state);
>  extern const char *amf_assignment_state (int state);
> -extern struct amf_node *amf_node_find_by_nodeid (unsigned int nodeid);
>  extern char *amf_serialize_SaNameT (
>  	char *buf, int *size, int *offset, SaNameT *name);
>  extern char *amf_serialize_SaStringT (
> @@ -550,6 +549,8 @@
>  extern char *amf_deserialize_SaUint32T (char *buf, SaUint32T *num);
>  extern char *amf_deserialize_SaUint64T (char *buf, SaUint64T *num);
>  extern char *amf_deserialize_opaque (char *buf, char *dst, int *cnt);
> +extern void amf_msg_mcast (int id, void *buf, size_t len);
> +extern void amf_util_init (void);
>  
>  /*===========================================================================*/
>  /* amfnode.c */
> @@ -561,6 +562,8 @@
>  extern struct amf_node *amf_node_deserialize (
>  	struct amf_cluster *cluster, char *buf, int size);
>  extern struct amf_node *amf_node_find (SaNameT *name);
> +extern struct amf_node *amf_node_find_by_nodeid (unsigned int nodeid);
> +extern struct amf_node *amf_node_find_by_hostname (const char *hostname);
>  
>  /* Event methods */
>  extern void amf_node_sync_ready (struct amf_node *node);
> Index: conf/amf.conf
> ===================================================================
> --- conf/amf.conf	(revision 1229)
> +++ conf/amf.conf	(working copy)
> @@ -7,10 +7,16 @@
>  
>  safAmfCluster = TEST_CLUSTER {
>  	saAmfClusterStartupTimeout=3000
> -	safAmfNode = seasc0036 {
> +	safAmfNode = AMF1 {
>  		saAmfNodeSuFailOverProb=2000
>  		saAmfNodeSuFailoverMax=2
> +		saAmfNodeClmNode=p01
>  	}
> +	safAmfNode = AMF2 {
> +		saAmfNodeSuFailOverProb=2000
> +		saAmfNodeSuFailoverMax=2
> +		saAmfNodeClmNode=p02
> +	}
>  	safApp = APP-1 {
>  		safSg = RAID {
>  			saAmfSGRedundancyModel=nplusm	
> @@ -24,7 +30,7 @@
>  			saAmfSGSuRestartMax=1
>  			saAmfSGAutoAdjustProb=5000
>  			safSu = SERVICE_X_1 {
> -				saAmfSUHostedByNode=seasc0036
> +				saAmfSUHostedByNode=AMF1
>  				saAmfSUNumComponents=1
>  				safComp = A {
>  					saAmfCompCategory=sa_aware
> @@ -81,7 +87,7 @@
>  			}
>  			safSu = SERVICE_X_2 {
>  				clccli_path=/tmp/aistest
> -				saAmfSUHostedByNode=seasc0036
> +				saAmfSUHostedByNode=AMF2
>  				saAmfSUNumComponents=1
>  				safComp = A {
>  					saAmfCompCategory=sa_aware
> 
> 
> ------------------------------------------------------------------------
> 
> _______________________________________________
> Openais mailing list
> Openais at lists.osdl.org
> https://lists.osdl.org/mailman/listinfo/openais




More information about the Openais mailing list