[Openais] Patch AMF sync #4
Hans Feldt
Hans.Feldt at ericsson.com
Tue Sep 5 00:35:39 PDT 2006
Committed revision 1236.
Hans Feldt wrote:
> This patch includes:
>
> - "No need for DNS or /etc/hosts"
>
> The call to gethostbyaddr() has been removed. This has been replaced by
> a protocol where each node multicasts its hostname (obtained with
> gethostname()).
>
> - "Logical AMF nodes"
>
> The AMF node name is no longer a hostname. The saAmfNodeClmNode
> configuration attribute of the AMF node is now the hostname. This config
> attribute is now mandatory. The change to amf.conf file shows required
> changes.
>
> - Some other AMF sync bug fixes
>
>
> This patch is probably in conflict with the one sent by Lars earlier. We
> will sort that out before commit.
>
> FYI, coming up is an amf.conf man page.
>
> Regards,
> Hans
>
>
> ------------------------------------------------------------------------
>
> Index: exec/amfnode.c
> ===================================================================
> --- exec/amfnode.c (revision 1232)
> +++ exec/amfnode.c (working copy)
> @@ -119,8 +119,8 @@
> static void amf_node_acsm_enter_leaving_spontaneously(struct amf_node *node)
> {
> ENTER("'%s'", node->name.value);
> - node->synchronized = FALSE;
> node->saAmfNodeOperState = SA_AMF_OPERATIONAL_DISABLED;
> + node->nodeid = 0;
> }
>
> static void amf_node_acsm_enter_failing_over (struct amf_node *node)
> @@ -160,10 +160,10 @@
> */
> void amf_node_leave (struct amf_node *node)
> {
> - ENTER("'%s'", node->name.value);
> + ENTER("'%s', CLM node '%s'", node->name.value,
> + node->saAmfNodeClmNode.value);
> amf_node_acsm_enter_leaving_spontaneously(node);
> amf_node_acsm_enter_failing_over (node);
> -
> }
>
> /**
> @@ -297,19 +297,15 @@
> {
> struct amf_node *node;
>
> - if (amf_cluster == NULL) {
> - return NULL;
> - }
> + assert (name != NULL && amf_cluster != NULL);
>
> - assert (name != NULL);
> -
> for (node = amf_cluster->node_head; node != NULL; node = node->next) {
> if (name_match (&node->name, name)) {
> return node;
> }
> }
>
> - dprintf ("node %s not found!", name->value);
> + dprintf ("node %s not found in configuration!", name->value);
>
> return NULL;
> }
> @@ -318,17 +314,36 @@
> {
> struct amf_node *node;
>
> + assert (amf_cluster != NULL);
> +
> for (node = amf_cluster->node_head; node != NULL; node = node->next) {
> if (node->nodeid == nodeid) {
> return node;
> }
> }
>
> - dprintf ("node %u not found!", nodeid);
> + dprintf ("node %u not found in configuration!", nodeid);
>
> return NULL;
> }
>
> +struct amf_node *amf_node_find_by_hostname (const char *hostname)
> +{
> + struct amf_node *node;
> +
> + assert (hostname != NULL && amf_cluster != NULL);
> +
> + for (node = amf_cluster->node_head; node != NULL; node = node->next) {
> + if (strcmp ((char*)node->saAmfNodeClmNode.value, hostname) == 0) {
> + return node;
> + }
> + }
> +
> + dprintf ("node %s not found in configuration!", hostname);
> +
> + return NULL;
> +}
> +
> static int all_applications_on_node_started (struct amf_node *node,
> struct amf_cluster *cluster)
> {
> Index: exec/amfutil.c
> ===================================================================
> --- exec/amfutil.c (revision 1232)
> +++ exec/amfutil.c (working copy)
> @@ -62,11 +62,16 @@
> #include "print.h"
> #include "aispoll.h"
> #include "main.h"
> +#include "service.h"
>
> #ifndef OPENAIS_CLUSTER_STARTUP_TIMEOUT
> #define OPENAIS_CLUSTER_STARTUP_TIMEOUT 5000
> #endif
>
> +struct req_exec_amf_msg {
> + mar_req_header_t header;
> +};
> +
> static const char *presence_state_text[] = {
> "UNKNOWN",
> "UNINSTANTIATED",
> @@ -346,10 +351,12 @@
> break;
>
> case AMF_NODE:
> - if ((loc = strstr_rs (line, "saAmfNodeSuFailOverProb")) != 0) {
> + if ((loc = strstr_rs (line, "saAmfNodeSuFailOverProb=")) != 0) {
> node->saAmfNodeSuFailOverProb = atol(loc);
> - } else if ((loc = strstr_rs (line, "saAmfNodeSuFailoverMax")) != 0) {
> + } else if ((loc = strstr_rs (line, "saAmfNodeSuFailoverMax=")) != 0) {
> node->saAmfNodeSuFailoverMax = atol(loc);
> + } else if ((loc = strstr_rs (line, "saAmfNodeClmNode=")) != 0) {
> + setSaNameT (&node->saAmfNodeClmNode, trim_str (loc));
> } else if ((loc = strstr_rs (line, "saAmfNodeAutoRepair=")) != 0) {
> if (strcmp (loc, "true") == 0) {
> node->saAmfNodeAutoRepair = SA_TRUE;
> @@ -383,6 +390,10 @@
> error_reason = "saAmfNodeSuFailoverMax missing";
> goto parse_error;
> }
> + if (node->saAmfNodeClmNode.length == 0) {
> + error_reason = "saAmfNodeClmNode missing";
> + goto parse_error;
> + }
> current_parse = AMF_CLUSTER;
> } else {
> goto parse_error;
> @@ -902,12 +913,16 @@
> log_printf (LOG_INFO, "safCluster=%s", getSaNameT(&cluster->name));
> log_printf (LOG_INFO, " admin state: %s\n",
> admin_state_text[cluster->saAmfClusterAdminState]);
> + log_printf (LOG_INFO, " state: %u\n", cluster->state);
> for (node = cluster->node_head; node != NULL; node = node->next) {
> log_printf (LOG_INFO, " safNode=%s\n", getSaNameT (&node->name));
> + log_printf (LOG_INFO, " CLM Node: %s\n", getSaNameT (&node->saAmfNodeClmNode));
> + log_printf (LOG_INFO, " node ID: %u\n", node->nodeid);
> log_printf (LOG_INFO, " admin state: %s\n",
> admin_state_text[node->saAmfNodeAdminState]);
> log_printf (LOG_INFO, " oper state: %s\n",
> oper_state_text[node->saAmfNodeOperState]);
> + log_printf (LOG_INFO, " acsm state: %u\n", node->acsm_state);
> }
> for (app = cluster->application_head; app != NULL; app = app->next) {
> log_printf (LOG_INFO, " safApp=%s\n", getSaNameT(&app->name));
> @@ -916,6 +931,8 @@
> log_printf (LOG_INFO, " num_sg: %d\n", app->saAmfApplicationCurrNumSG);
> for (sg = app->sg_head; sg != NULL; sg = sg->next) {
> log_printf (LOG_INFO, " safSg=%s\n", getSaNameT(&sg->name));
> + log_printf (LOG_INFO, " avail_state: %u\n",
> + sg->avail_state);
> log_printf (LOG_INFO, " admin state: %s\n",
> admin_state_text[sg->saAmfSGAdminState]);
> log_printf (LOG_INFO, " assigned SUs %d\n",
> @@ -1272,4 +1289,40 @@
>
> }
>
> +void amf_msg_mcast (int id, void *buf, size_t len)
> +{
> + struct req_exec_amf_msg msg;
> + struct iovec iov[2];
> + int iov_cnt;
> + int res;
>
> +// ENTER ("%u, %p, %u", id, buf, len);
> +
> + msg.header.size = sizeof (msg);
> + msg.header.id = SERVICE_ID_MAKE (AMF_SERVICE, id);
> + iov[0].iov_base = &msg;
> + iov[0].iov_len = sizeof (msg);
> +
> + if (buf == NULL) {
> + msg.header.size = sizeof (msg);
> + iov_cnt = 1;
> + } else {
> + msg.header.size = sizeof (msg) + len;
> + iov[1].iov_base = buf;
> + iov[1].iov_len = len;
> + iov_cnt = 2;
> + }
> +
> + res = totempg_groups_mcast_joined (
> + openais_group_handle, iov, iov_cnt, TOTEMPG_AGREED);
> +
> + if (res != 0) {
> + dprintf("Unable to send %d bytes\n", msg.header.size);
> + openais_exit_error (AIS_DONE_FATAL_ERR);
> + }
> +}
> +
> +void amf_util_init (void)
> +{
> + log_init ("AMF");
> +}
> Index: exec/amf.c
> ===================================================================
> --- exec/amf.c (revision 1233)
> +++ exec/amf.c (working copy)
> @@ -71,10 +71,12 @@
> * IDLE node_joined PROBING-1
> * PROBING-1 timer1 timeout A1 PROBING-2
> * PROBING-1 SYNC_START A2 UPDATING_CLUSTER_MODEL
> + * PROBING-1 node_joined A7 PROBING-1
> * PROBING-2 SYNC_START[From me] CREATING_CLUSTER_MODEL
> * PROBING-2 SYNC_START[From other] UPDATING_CLUSTER_MODEL
> - * CREATING_CLUSTER_MODEL Model created SYNCHRONIZING
> - * SYNCHRONIZING SYNC_READY NORMAL_OPERATION
> + * PROBING-2 node_joined A7 PROBING-2
> + * CREATING_CLUSTER_MODEL Model created A8 SYNCHRONIZING
> + * SYNCHRONIZING SYNC_READY A10 NORMAL_OPERATION
> * SYNCHRONIZING node_left[sync_master] A5 SYNCHRONIZING
> * SYNCHRONIZING node_joined SYNCHRONIZING
> * UPDATING_CLUSTER_MODEL SYNC_DATA A3 UPDATING_CLUSTER_MODEL
> @@ -82,7 +84,9 @@
> * UPDATING_CLUSTER_MODEL SYNC_START A5 NORMAL_OPERATION
> * UPDATING_CLUSTER_MODEL node_left[sync_master] PROBING-1
> * NORMAL_OPERATION node_joined SYNCHRONIZING
> - * NORMAL_OPERATION node_left[sync_master] A6 NORMAL_OPERATION
> + * NORMAL_OPERATION node_left[sync_master] A6 NORMAL_OPERATION
> + * NORMAL_OPERATION SYNC_REQUEST A8 NORMAL_OPERATION
> + * Any SYNC_REQUEST A9 No change
> *
> * 1.2 State Description
> * =====================
> @@ -105,7 +109,10 @@
> * A4 - Create cluster model
> * A5 - Free received SYNC_DATA
> * A6 - Calculate new sync master
> - *
> + * A7 - Multicast SYNC_REQUEST message
> + * A8 - Update AMF node object(s) with CLM nodeid
> + * A9 - Save CLM nodeid & hostname
> + * A10- Delete CLM nodes
> */
>
> #include <sys/types.h>
> @@ -144,10 +151,6 @@
> #include "objdb.h"
> #include "print.h"
>
> -#define LOG_LEVEL_FROM_LIB LOG_LEVEL_DEBUG
> -#define LOG_LEVEL_FROM_GMI LOG_LEVEL_DEBUG
> -#define LOG_LEVEL_ENTER_FUNC LOG_LEVEL_DEBUG
> -
> #ifndef HOST_NAME_MAX
> # define HOST_NAME_MAX 255
> #endif
> @@ -206,6 +209,8 @@
> void *message, unsigned int nodeid);
> static void message_handler_req_exec_amf_cluster_start_tmo (
> void *message, unsigned int nodeid);
> +static void message_handler_req_exec_amf_sync_request (
> + void *message, unsigned int nodeid);
> static void amf_dump_fn (void);
> static void amf_sync_init (void);
> static int amf_sync_process (void);
> @@ -338,7 +343,10 @@
> },
> {
> .exec_handler_fn = message_handler_req_exec_amf_cluster_start_tmo,
> - }
> + },
> + {
> + .exec_handler_fn = message_handler_req_exec_amf_sync_request,
> + },
> };
>
> /*
> @@ -429,6 +437,11 @@
> amf_object_type_t object_type;
> };
>
> +struct req_exec_amf_sync_request {
> + mar_req_header_t header;
> + char hostname[HOST_NAME_MAX + 1];
> +};
> +
> static const char *scsm_state_names[] = {
> "Unknown",
> "IDLE",
> @@ -463,7 +476,6 @@
> /* node ID of current sync master */
> unsigned int sync_master;
>
> - unsigned int *member_list;
> unsigned int *joined_list;
> unsigned int joined_list_entries;
> struct amf_cluster *cluster;
> @@ -491,34 +503,82 @@
> */
> static struct scsm_descriptor scsm;
>
> -/* IMPL */
> -static char *hostname_get (unsigned int nodeid)
> +typedef struct clm_node {
> + unsigned int nodeid;
> + char hostname[HOST_NAME_MAX + 1];
> + struct clm_node *next;
> +} clm_node_t;
> +
> +static char hostname[HOST_NAME_MAX + 1];
> +
> +/*
> + * Nodes in the cluster, only used for initial start
> + * since before the AMF node object exist, we don't
> + * have storage for the information received in
> + * SYNC_REQUEST msg.
> + */
> +static clm_node_t *clm_nodes;
> +
> +/******************************************************************************
> + * Internal (static) utility functions
> + *****************************************************************************/
> +
> +/**
> + * Find a CLM node object using nodeid as query. Allocate and
> + * return new object if not found.
> + *
> + * @param nodeid
> + *
> + * @return clm_node_t*
> + */
> +static clm_node_t *clm_node_find_by_nodeid (unsigned int nodeid)
> {
> - struct totem_ip_address interfaces[INTERFACE_MAX];
> - char **status;
> - unsigned int iface_count;
> - int res;
> - struct hostent *ent;
> -
> - res = totempg_ifaces_get (nodeid, interfaces, &status, &iface_count);
> - if (res == -1) {
> - log_printf (LOG_LEVEL_ERROR, "totempg_ifaces_get failed for %u", nodeid);
> - openais_exit_error (AIS_DONE_FATAL_ERR);
> - }
> - if (iface_count > 0) {
> - ent = gethostbyaddr (interfaces[0].addr, 4, interfaces[0].family);
> - if (ent == NULL) {
> - log_printf (LOG_LEVEL_ERROR, "gethostbyaddr failed: %d\n", h_errno);
> - openais_exit_error (AIS_DONE_FATAL_ERR);
> - }
> -
> - return ent->h_name;
> - }
> -
> - return NULL;
> + clm_node_t *clm_node;
> +
> + for (clm_node = clm_nodes; clm_node != NULL; clm_node = clm_node->next) {
> + if (clm_node->nodeid == nodeid) {
> + return clm_node;
> + }
> + }
> +
> + clm_node = amf_malloc (sizeof (clm_node_t));
> + clm_node->nodeid = nodeid;
> + clm_node->next = clm_nodes;
> + clm_nodes = clm_node;
> +
> + return clm_node;
> }
>
> /**
> + * Init nodeids in the AMF node objects using information in the
> + * CLM node objects.
> + */
> +static void nodeids_init (void)
> +{
> + amf_node_t *amf_node;
> + clm_node_t *clm_node;
> +
> + ENTER ("");
> +
> + for (clm_node = clm_nodes; clm_node != NULL; clm_node = clm_node->next) {
> + /*
> + * Iterate all AMF nodes if several AMF nodes are mapped to this
> + * particular CLM node.*
> + */
> + for (amf_node = amf_cluster->node_head; amf_node != NULL;
> + amf_node = amf_node->next) {
> +
> + if (strcmp ((char*)amf_node->saAmfNodeClmNode.value,
> + clm_node->hostname) == 0) {
> +
> + dprintf ("%s id set to %u", amf_node->name.value, clm_node->nodeid);
> + amf_node->nodeid = clm_node->nodeid;
> + }
> + }
> + }
> +}
> +
> +/**
> * Return pointer to this node object.
> *
> * @param cluster
> @@ -527,16 +587,16 @@
> */
> static struct amf_node *get_this_node_obj (struct amf_cluster *cluster)
> {
> - SaClmClusterNodeT *clm_node = main_clm_get_by_nodeid (SA_CLM_LOCAL_NODE_ID);
> - char *hostname;
> - SaNameT name;
> + char hostname[HOST_NAME_MAX + 1];
>
> - assert (clm_node != NULL);
> - hostname = hostname_get (clm_node->nodeId);
> - assert (hostname != NULL);
> - setSaNameT (&name, hostname);
> + assert (cluster != NULL);
>
> - return amf_node_find (&name);
> + if (gethostname (hostname, sizeof(hostname)) == -1) {
> + log_printf (LOG_LEVEL_ERROR, "gethostname failed: %d", errno);
> + openais_exit_error (AIS_DONE_FATAL_ERR);
> + }
> +
> + return amf_node_find_by_hostname (hostname);
> }
>
> /**
> @@ -551,37 +611,6 @@
> }
>
> /**
> - * Multicast SYNC_START message
> - *
> - * @return int
> - */
> -static int mcast_sync_start (void)
> -{
> - struct req_exec_amf_sync_data req_exec;
> - struct iovec iov[1];
> - int res;
> -
> - SYNCTRACE ("state %s", scsm_state_names[scsm.state]);
> -
> - req_exec.header.size = sizeof (struct req_exec_amf_sync_data);
> - req_exec.header.id =
> - SERVICE_ID_MAKE (AMF_SERVICE, MESSAGE_REQ_EXEC_AMF_SYNC_START);
> -
> - iov[0].iov_base = &req_exec;
> - iov[0].iov_len = sizeof (struct req_exec_amf_sync_data);
> -
> - res = totempg_groups_mcast_joined (
> - openais_group_handle, iov, 1, TOTEMPG_AGREED);
> -
> - if (res != 0) {
> - dprintf("Unable to send %d bytes of sync data\n", req_exec.header.size);
> - openais_exit_error (AIS_DONE_FATAL_ERR);
> - }
> -
> - return res;
> -}
> -
> -/**
> * Multicast SYNC_DATA message containing a model object.
> *
> * @param buf
> @@ -620,34 +649,6 @@
> }
>
> /**
> - * Multicast SYNC_READY message
> - */
> -static void mcast_sync_ready (void)
> -{
> - struct req_exec_amf_sync_data req_exec;
> - struct iovec iov[1];
> - int res;
> -
> - SYNCTRACE ("state %s", scsm_state_names[scsm.state]);
> -
> -
> - req_exec.header.size = sizeof (struct req_exec_amf_sync_data);
> - req_exec.header.id =
> - SERVICE_ID_MAKE (AMF_SERVICE, MESSAGE_REQ_EXEC_AMF_SYNC_READY);
> -
> - iov[0].iov_base = &req_exec;
> - iov[0].iov_len = sizeof (struct req_exec_amf_sync_data);
> -
> - res = totempg_groups_mcast_joined (
> - openais_group_handle, iov, 1, TOTEMPG_AGREED);
> -
> - if (res != 0) {
> - dprintf("Unable to send %d bytes of sync data\n", req_exec.header.size);
> - openais_exit_error (AIS_DONE_FATAL_ERR);
> - }
> -}
> -
> -/**
> * Timer callback function. The time waiting for external
> * synchronisation has expired, start competing with other
> * nodes to determine who should read config file.
> @@ -656,9 +657,7 @@
> static void timer_function_scsm_timer1_tmo (void *data)
> {
> SYNCTRACE ("");
> - if (mcast_sync_start () != 0) {
> - openais_exit_error (AIS_DONE_FATAL_ERR);
> - }
> + amf_msg_mcast (MESSAGE_REQ_EXEC_AMF_SYNC_START, NULL, 0);
> sync_state_set (PROBING_2);
> }
>
> @@ -686,7 +685,7 @@
> }
> } while (res != 0);
>
> - mcast_sync_ready ();
> + amf_msg_mcast (MESSAGE_REQ_EXEC_AMF_SYNC_READY, NULL, 0);
> }
>
> /**
> @@ -708,10 +707,12 @@
>
> if (this_amf_node == NULL) {
> log_printf (LOG_LEVEL_INFO,
> - "This node is not configured as an AMF node, disabling.");
> + "Info: This node is not configured as an AMF node, disabling.");
> return -1;
> }
>
> + this_amf_node->nodeid = this_ip->nodeid;
> +
> return 0;
> }
>
> @@ -1120,46 +1121,26 @@
> return 0;
> }
>
> +/**
> + * Start the AMF nodes that has joined
> + */
> static void joined_nodes_start (void)
> {
> int i;
> + struct amf_node *node;
>
> for (i = 0; i < scsm.joined_list_entries; i++) {
> - SaNameT name;
> - struct amf_node *node;
> + node = amf_node_find_by_nodeid (scsm.joined_list[i]);
>
> - setSaNameT (&name, hostname_get (scsm.joined_list[i]));
> - node = amf_node_find (&name);
> if (node != NULL) {
> - node->nodeid = scsm.joined_list[i];
> amf_node_sync_ready (node);
> } else {
> log_printf (LOG_LEVEL_INFO,
> - "Node %s is not configured as an AMF node", name.value);
> + "Info: Node %u is not configured as an AMF node", scsm.joined_list[i]);
> }
> }
> }
>
> -static void init_nodeids (void)
> -{
> - int i;
> -
> - ENTER ("");
> -
> - for (i = 0; scsm.member_list[i] != 0; i++) {
> - SaNameT name;
> - struct amf_node *node;
> -
> - setSaNameT (&name, hostname_get (scsm.member_list[i]));
> - node = amf_node_find (&name);
> -
> - assert (node != NULL);
> - node->nodeid = scsm.member_list[i];
> - }
> -
> - LEAVE ("");
> -}
> -
> /******************************************************************************
> * AMF Framework callback implementation *
> *****************************************************************************/
> @@ -1167,6 +1148,7 @@
> static void amf_sync_init (void)
> {
> SYNCTRACE ("state %s", scsm_state_names[scsm.state]);
> +
> switch (scsm.state) {
> case UNCONFIGURED:
> case PROBING_1:
> @@ -1184,8 +1166,9 @@
> }
>
> if (scsm.state == SYNCHRONIZING && scsm.sync_master == this_ip->nodeid) {
> - mcast_sync_start ();
> + amf_msg_mcast (MESSAGE_REQ_EXEC_AMF_SYNC_START, NULL, 0);
> assert (amf_cluster != NULL);
> + nodeids_init ();
> scsm.cluster = amf_cluster;
> scsm.node = amf_cluster->node_head;
> scsm.app = amf_cluster->application_head;
> @@ -1271,23 +1254,32 @@
> }
>
> /**
> - * SCSM normal exit function for state SYNCHRONIZING
> - * All synced objects are now commited, start node/cluster.
> + * SCSM normal exit function for states SYNCHRONIZING &
> + * UPDATING_CLUSTER_MODEL. All synced objects are now
> + * commited, start node/cluster.
> */
> static void amf_sync_activate (void)
> {
> + clm_node_t *clm_node = clm_nodes;
> +
> SYNCTRACE ("state %s", scsm_state_names[scsm.state]);
>
> switch (scsm.state) {
> case SYNCHRONIZING:
> + /* Delete all CLM nodes, not needed any longer. */
> + while (clm_node != NULL) {
> + clm_node_t *tmp = clm_node;
> + clm_node = clm_node->next;
> + free (tmp);
> + }
> + clm_nodes = NULL;
> sync_state_set (NORMAL_OPERATION);
> - init_nodeids ();
> /* TODO: Remove dependencies to amf_cluster->state */
> switch (amf_cluster->state) {
> - case CLUSTER_STARTED: {
> + case CLUSTER_STARTED:
> + case CLUSTER_STARTING_WORKLOAD:
> joined_nodes_start ();
> break;
> - }
> case CLUSTER_STARTING_COMPONENTS: {
> amf_cluster_sync_ready (amf_cluster);
> joined_nodes_start ();
> @@ -1305,14 +1297,15 @@
> scsm.cluster = NULL;
> this_amf_node = get_this_node_obj (amf_cluster);
> sync_state_set (NORMAL_OPERATION);
> - init_nodeids ();
> if (this_amf_node != NULL) {
> + this_amf_node->nodeid = this_ip->nodeid;
> #ifdef AMF_DEBUG
> amf_runtime_attributes_print (amf_cluster);
> #endif
> /* TODO: Remove dependencies to amf_cluster->state */
> switch (amf_cluster->state) {
> case CLUSTER_STARTED: {
> + case CLUSTER_STARTING_WORKLOAD:
> amf_node_sync_ready (this_amf_node);
> break;
> }
> @@ -1328,7 +1321,7 @@
> }
> } else {
> log_printf (LOG_LEVEL_INFO,
> - "This node is not configured as an AMF node, disabling.");
> + "Info: This node is not configured as an AMF node, disabling.");
> sync_state_set (UNCONFIGURED);
> }
> break;
> @@ -1341,7 +1334,7 @@
> assert (0);
> }
>
> - LEAVE_VOID ();
> + SYNCTRACE ("");
> }
>
> /**
> @@ -1355,6 +1348,11 @@
> {
> log_init ("AMF");
>
> + if (gethostname (hostname, sizeof (hostname)) == -1) {
> + log_printf (LOG_LEVEL_ERROR, "gethostname failed: %d", errno);
> + openais_exit_error (AIS_DONE_FATAL_ERR);
> + }
> +
> if (!amf_enabled (objdb)) {
> sync_state_set (UNCONFIGURED);
> return 0;
> @@ -1369,6 +1367,7 @@
> amf_su_init();
> amf_comp_init();
> amf_si_init();
> + amf_util_init ();
>
> return (0);
> }
> @@ -1391,34 +1390,21 @@
> unsigned int *joined_list, int joined_list_entries,
> struct memb_ring_id *ring_id)
> {
> - unsigned int i;
> -
> ENTER ("mnum: %d, jnum: %d, lnum: %d, sync state: %s, ring ID %llu rep %s\n",
> member_list_entries, joined_list_entries, left_list_entries,
> scsm_state_names[scsm.state], ring_id->seq, totemip_print (&ring_id->rep));
>
> + /*
> + * Save nodes that joined, needed to initialize each
> + * node's totem node id later.
> + */
> scsm.joined_list_entries = joined_list_entries;
> if (scsm.joined_list != NULL) {
> free (scsm.joined_list);
> }
> scsm.joined_list = amf_malloc (joined_list_entries * sizeof (unsigned int));
> - for (i = 0; i < joined_list_entries; i++) {
> - scsm.joined_list[i] = joined_list[i];
> - }
> + memcpy (scsm.joined_list, joined_list, sizeof (unsigned int) * joined_list_entries);
>
> - /**
> - * Save current members of the cluster, needed to initialize
> - * each node's totem node id later.
> - */
> - if (scsm.member_list != NULL) {
> - free (scsm.member_list);
> - }
> - scsm.member_list = amf_malloc ((member_list_entries + 1) * sizeof (unsigned int));
> - for (i = 0; i < member_list_entries; i++) {
> - scsm.member_list[i] = member_list[i];
> - }
> - scsm.member_list[i] = 0;
> -
> switch (scsm.state) {
> case IDLE: {
> sync_state_set (PROBING_1);
> @@ -1432,7 +1418,11 @@
> case PROBING_1:
> /* fall-through */
> case PROBING_2:
> - /* fall-through */
> + if (joined_list_entries > 0) {
> + amf_msg_mcast (MESSAGE_REQ_EXEC_AMF_SYNC_REQUEST,
> + hostname, strlen (hostname) + 1);
> + }
> + break;
> case UNCONFIGURED:
> break;
> case UPDATING_CLUSTER_MODEL:
> @@ -1453,11 +1443,11 @@
> case SYNCHRONIZING: {
> if (joined_list_entries > 0 && scsm.sync_master == this_ip->nodeid) {
> /* restart sync */
> - mcast_sync_start ();
> + amf_msg_mcast (MESSAGE_REQ_EXEC_AMF_SYNC_START, NULL, 0);
> }
> - /** If the sync master left the cluster, calculate a new sync
> - * master between the remaining nodes in the cluster excluding
> - * the nodes we are just syncing.
> + /* If the sync master left the cluster, calculate a new sync
> + * master between the remaining nodes in the cluster excluding
> + * the nodes we are just syncing.
> */
> if (!is_member (scsm.sync_master, member_list, member_list_entries)) {
> scsm.sync_master =
> @@ -1468,14 +1458,14 @@
> if (scsm.sync_master == this_ip->nodeid) {
> /* restart sync */
> SYNCTRACE ("I am (new) sync master");
> - mcast_sync_start ();
> + amf_msg_mcast (MESSAGE_REQ_EXEC_AMF_SYNC_START, NULL, 0);
> }
> }
> break;
> }
> case NORMAL_OPERATION: {
> - /** If the sync master left the cluster, calculate a new sync
> - * master between the remaining nodes in the cluster.
> + /* If the sync master left the cluster, calculate a new sync
> + * master between the remaining nodes in the cluster.
> */
> if (!is_member (scsm.sync_master, member_list, member_list_entries)) {
> scsm.sync_master =
> @@ -1493,8 +1483,9 @@
>
> for (i = 0; i < left_list_entries; i++) {
> node = amf_node_find_by_nodeid (left_list[i]);
> - assert (node != NULL);
> - amf_node_leave(node);
> + if (node != NULL) {
> + amf_node_leave(node);
> + }
> }
> }
> break;
> @@ -1552,7 +1543,7 @@
>
> comp = amf_comp_find (amf_cluster, &req_exec->compName);
> assert (comp != NULL);
> - ENTER ("'%s'", comp->name.value);
> + TRACE1 ("ComponentRegister: '%s'", comp->name.value);
> error = amf_comp_register (comp);
>
> if (amf_su_is_local (comp->su)) {
> @@ -1591,7 +1582,7 @@
>
> comp = amf_comp_find (amf_cluster, &req_exec->compName);
> if (comp == NULL) {
> - log_printf (LOG_ERR, "'%s' not found", req_exec->compName.value);
> + log_printf (LOG_ERR, "Error: '%s' not found", req_exec->compName.value);
> return;
> }
>
> @@ -1611,7 +1602,7 @@
>
> comp = amf_comp_find (amf_cluster, &req_exec->compName);
> if (comp == NULL) {
> - log_printf (LOG_ERR, "'%s' not found", req_exec->compName.value);
> + log_printf (LOG_ERR, "Error: '%s' not found", req_exec->compName.value);
> return;
> }
>
> @@ -1634,7 +1625,7 @@
> return;
> }
>
> - ENTER ("%s", req_exec->dn.value);
> + TRACE1 ("AmfResponse: %s", req_exec->dn.value);
>
> comp = amf_comp_response_2 (
> req_exec->interface, &req_exec->dn, req_exec->error, &retval);
> @@ -1834,6 +1825,35 @@
> }
> }
>
> +static void message_handler_req_exec_amf_sync_request (
> + void *message, unsigned int nodeid)
> +{
> + struct req_exec_amf_sync_request *req_exec = message;
> + clm_node_t *clm_node;
> +
> + SYNCTRACE ("from: %s, name: %s, state %s", totempg_ifaces_print (nodeid),
> + req_exec->hostname, scsm_state_names[scsm.state]);
> +
> + clm_node = clm_node_find_by_nodeid (nodeid);
> + assert (clm_node != NULL);
> + strcpy (clm_node->hostname, req_exec->hostname);
> +
> + if (scsm.state == NORMAL_OPERATION) {
> + amf_node_t *amf_node = amf_cluster->node_head;
> + /*
> + * Iterate all AMF nodes if several AMF nodes are mapped to this
> + * particular CLM node.
> + */
> + for (; amf_node != NULL; amf_node = amf_node->next) {
> + if (strcmp ((char*)amf_node->saAmfNodeClmNode.value,
> + req_exec->hostname) == 0) {
> +
> + amf_node->nodeid = nodeid;
> + }
> + }
> + }
> +}
> +
> /*****************************************************************************
> * Library Interface Implementation
> ****************************************************************************/
> @@ -1853,7 +1873,7 @@
> struct iovec iovec;
> struct amf_pd *amf_pd = openais_conn_private_data_get (conn);
>
> - TRACE2("Lib comp register '%s'", req_lib->compName.value);
> + TRACE2("Comp register '%s'", req_lib->compName.value);
> comp->conn = conn;
> amf_pd->comp = comp;
> req_exec.header.size = sizeof (struct req_exec_amf_comp_register);
> @@ -1868,7 +1888,7 @@
> &iovec, 1, TOTEMPG_AGREED) == 0);
> } else {
> struct res_lib_amf_componentregister res_lib;
> - log_printf (LOG_ERR, "Lib comp register: comp '%s' not found", req_lib->compName.value);
> + log_printf (LOG_ERR, "Error: Comp register: '%s' not found", req_lib->compName.value);
> res_lib.header.id = MESSAGE_RES_AMF_COMPONENTREGISTER;
> res_lib.header.size = sizeof (struct res_lib_amf_componentregister);
> res_lib.header.error = SA_AIS_ERR_INVALID_PARAM;
> @@ -1931,8 +1951,6 @@
> struct amf_comp *comp;
> SaAisErrorT error = SA_AIS_OK;
>
> - assert (scsm.state == NORMAL_OPERATION);
> -
> comp = amf_comp_find (amf_cluster, &req_lib->compName);
>
> if (comp != NULL) {
> @@ -1961,8 +1979,6 @@
> struct amf_comp *comp;
> SaAisErrorT error = SA_AIS_OK;
>
> - assert (scsm.state == NORMAL_OPERATION);
> -
> comp = amf_comp_find (amf_cluster, &req_lib->compName);
> if (comp != NULL) {
> error = amf_comp_healthcheck_confirm (
> @@ -1987,8 +2003,6 @@
> struct amf_comp *comp;
> SaAisErrorT error = SA_AIS_OK;
>
> - assert (scsm.state == NORMAL_OPERATION);
> -
> comp = amf_comp_find (amf_cluster, &req_lib->compName);
> if (comp != NULL) {
> error = amf_comp_healthcheck_stop (comp, &req_lib->healthcheckKey);
> @@ -2012,8 +2026,6 @@
> SaAmfHAStateT ha_state;
> SaAisErrorT error;
>
> - assert (scsm.state == NORMAL_OPERATION);
> -
> comp = amf_comp_find (amf_cluster, &req_lib->compName);
> if (comp != NULL) {
> error = amf_comp_hastate_get (comp, &req_lib->csiName, &ha_state);
> @@ -2305,6 +2317,8 @@
> res_lib.header.size = sizeof (struct res_lib_amf_response);
> res_lib.header.error = retval;
>
> +// ENTER ("");
> +
> if (openais_conn_send_response (conn, &res_lib, sizeof (res_lib)) != 0) {
> openais_exit_error (AIS_DONE_FATAL_ERR);
> }
> Index: exec/amf.h
> ===================================================================
> --- exec/amf.h (revision 1232)
> +++ exec/amf.h (working copy)
> @@ -172,7 +172,7 @@
> enum cluster_states state;
> };
>
> -struct amf_node {
> +typedef struct amf_node {
> /* Configuration Attributes */
> SaNameT name;
> SaNameT saAmfNodeClmNode;
> @@ -193,8 +193,7 @@
> unsigned int nodeid;
> struct amf_node *next;
> amf_node_acsm_state_t acsm_state;
> - int synchronized;
> -};
> +} amf_node_t;
>
> struct amf_application {
> /* Configuration Attributes */
> @@ -501,7 +500,8 @@
> MESSAGE_REQ_EXEC_AMF_SYNC_START = 5,
> MESSAGE_REQ_EXEC_AMF_SYNC_DATA = 6,
> MESSAGE_REQ_EXEC_AMF_SYNC_READY = 7,
> - MESSAGE_REQ_EXEC_AMF_CLUSTER_START_TMO = 8
> + MESSAGE_REQ_EXEC_AMF_CLUSTER_START_TMO = 8,
> + MESSAGE_REQ_EXEC_AMF_SYNC_REQUEST = 9
> };
>
> struct req_exec_amf_clc_cleanup_completed {
> @@ -535,7 +535,6 @@
> extern const char *amf_ha_state (int state);
> extern const char *amf_readiness_state (int state);
> extern const char *amf_assignment_state (int state);
> -extern struct amf_node *amf_node_find_by_nodeid (unsigned int nodeid);
> extern char *amf_serialize_SaNameT (
> char *buf, int *size, int *offset, SaNameT *name);
> extern char *amf_serialize_SaStringT (
> @@ -550,6 +549,8 @@
> extern char *amf_deserialize_SaUint32T (char *buf, SaUint32T *num);
> extern char *amf_deserialize_SaUint64T (char *buf, SaUint64T *num);
> extern char *amf_deserialize_opaque (char *buf, char *dst, int *cnt);
> +extern void amf_msg_mcast (int id, void *buf, size_t len);
> +extern void amf_util_init (void);
>
> /*===========================================================================*/
> /* amfnode.c */
> @@ -561,6 +562,8 @@
> extern struct amf_node *amf_node_deserialize (
> struct amf_cluster *cluster, char *buf, int size);
> extern struct amf_node *amf_node_find (SaNameT *name);
> +extern struct amf_node *amf_node_find_by_nodeid (unsigned int nodeid);
> +extern struct amf_node *amf_node_find_by_hostname (const char *hostname);
>
> /* Event methods */
> extern void amf_node_sync_ready (struct amf_node *node);
> Index: conf/amf.conf
> ===================================================================
> --- conf/amf.conf (revision 1229)
> +++ conf/amf.conf (working copy)
> @@ -7,10 +7,16 @@
>
> safAmfCluster = TEST_CLUSTER {
> saAmfClusterStartupTimeout=3000
> - safAmfNode = seasc0036 {
> + safAmfNode = AMF1 {
> saAmfNodeSuFailOverProb=2000
> saAmfNodeSuFailoverMax=2
> + saAmfNodeClmNode=p01
> }
> + safAmfNode = AMF2 {
> + saAmfNodeSuFailOverProb=2000
> + saAmfNodeSuFailoverMax=2
> + saAmfNodeClmNode=p02
> + }
> safApp = APP-1 {
> safSg = RAID {
> saAmfSGRedundancyModel=nplusm
> @@ -24,7 +30,7 @@
> saAmfSGSuRestartMax=1
> saAmfSGAutoAdjustProb=5000
> safSu = SERVICE_X_1 {
> - saAmfSUHostedByNode=seasc0036
> + saAmfSUHostedByNode=AMF1
> saAmfSUNumComponents=1
> safComp = A {
> saAmfCompCategory=sa_aware
> @@ -81,7 +87,7 @@
> }
> safSu = SERVICE_X_2 {
> clccli_path=/tmp/aistest
> - saAmfSUHostedByNode=seasc0036
> + saAmfSUHostedByNode=AMF2
> saAmfSUNumComponents=1
> safComp = A {
> saAmfCompCategory=sa_aware
>
>
> ------------------------------------------------------------------------
>
> _______________________________________________
> Openais mailing list
> Openais at lists.osdl.org
> https://lists.osdl.org/mailman/listinfo/openais
More information about the Openais
mailing list