{"heat_template_version": "wallaby", "description": "Pacemaker service configured with Puppet\n", "parameters": {"ServiceData": {"default": {}, "description": "Dictionary packing service data", "type": "json"}, "ServiceNetMap": {"default": {}, "description": "Mapping of service_name -> network name. Typically set via parameter_defaults in the resource registry. Use parameter_merge_strategies to merge it with the defaults.", "type": "json"}, "RoleName": {"default": "", "description": "Role name on which the service is applied", "type": "string"}, "RoleParameters": {"default": {}, "description": "Parameters specific to the role", "type": "json"}, "EndpointMap": {"default": {}, "description": "Mapping of service endpoint -> protocol. Typically set via parameter_defaults in the resource registry.", "type": "json"}, "MonitoringSubscriptionPacemaker": {"default": "overcloud-pacemaker", "type": "string"}, "CorosyncIPv6": {"default": false, "description": "Enable IPv6 in Corosync", "type": "boolean"}, "EnableFencing": {"default": false, "description": "Whether to enable fencing in Pacemaker or not.", "type": "boolean"}, "PacemakerTLSPriorities": {"type": "string", "description": "Pacemaker TLS Priorities", "default": ""}, "PacemakerRemoteAuthkey": {"type": "string", "description": "The authkey for the pacemaker remote service.", "hidden": true}, "PcsdPassword": {"type": "string", "description": "The password for the 'pcsd' user for pacemaker.", "hidden": true}, "CorosyncTokenTimeout": {"type": "number", "description": "Time in milliseconds until a token loss is declared after not receiving a token.", "default": 10000}, "CorosyncSettleTries": {"type": "number", "description": "Number of tries for cluster settling. This has the same default as the pacemaker puppet module. Override to a smaller value when in need to replace a controller node.", "default": 360}, "FencingConfig": {"default": {}, "description": "Pacemaker fencing configuration. The JSON should have\nthe following structure:\n {\n \"devices\": [\n {\n \"agent\": \"AGENT_NAME\",\n \"host_mac\": \"HOST_MAC_ADDRESS\",\n \"params\": {\"PARAM_NAME\": \"PARAM_VALUE\"}\n }\n ]\n }\nFor instance:\n {\n \"devices\": [\n {\n \"agent\": \"fence_xvm\",\n \"host_mac\": \"52:54:00:aa:bb:cc\",\n \"params\": {\n \"multicast_address\": \"225.0.0.12\",\n \"port\": \"baremetal_0\",\n \"manage_fw\": true,\n \"manage_key_file\": true,\n \"key_file\": \"/etc/fence_xvm.key\",\n \"key_file_password\": \"abcdef\"\n }\n }\n ]\n }\n", "type": "json"}, "PacemakerLoggingSource": {"type": "json", "default": {"tag": "system.pacemaker", "file": "/var/log/host/pacemaker/pacemaker.log", "startmsg.regex": "^[a-zA-Z]{3} [0-9]{2} [:0-9]{8}"}}, "ContainerCli": {"type": "string", "default": "podman", "description": "CLI tool used to manage containers.", "constraints": [{"allowed_values": ["docker", "podman"]}]}, "EnableInstanceHA": {"default": false, "description": "Whether to enable an Instance Ha configurarion or not. This setup requires the Compute role to have the PacemakerRemote service added to it.", "type": "boolean"}, "PacemakerBundleOperationTimeout": {"type": "string", "default": "", "description": "The timeout for start, monitor and stop operations run by the container resource agent, in seconds. When set to default '', the timeout comes from pacemaker's default operation timeouts (20s). When set to default and podman is used, force the timeout to 120s.", "constraints": [{"allowed_pattern": "([1-9][0-9]*s)?"}]}}, "parameter_groups": [{"label": "deprecated", "description": "The following parameters are deprecated and will be removed. They should not\nbe relied on for new deployments. If you have concerns regarding deprecated\nparameters, please contact the TripleO development team on IRC or the\nOpenStack mailing list.\n", "parameters": ["CorosyncIPv6"]}], "conditions": {"pcmk_tls_priorities_empty": {"equals": [{"get_param": "PacemakerTLSPriorities"}, ""]}, "pcmk_bundle_op_timeout_empty": {"equals": [{"get_param": "PacemakerBundleOperationTimeout"}, ""]}, "podman_enabled": {"equals": [{"get_param": "ContainerCli"}, "podman"]}, "is_ipv6": {"equals": [{"get_param": ["ServiceData", "net_ip_version_map", {"get_param": ["ServiceNetMap", "PacemakerNetwork"]}]}, 6]}}, "outputs": {"role_data": {"description": "Role data for the Pacemaker role.", "value": {"service_name": "pacemaker", "monitoring_subscription": {"get_param": "MonitoringSubscriptionPacemaker"}, "firewall_rules": {"130 pacemaker tcp": {"proto": "tcp", "dport": [2224, 3121, 21064]}, "131 pacemaker udp": {"proto": "udp", "dport": 5405}}, "config_settings": {"map_merge": [{"pacemaker::corosync::cluster_name": "tripleo_cluster", "pacemaker::corosync::manage_fw": false, "pacemaker::resource_defaults::defaults": {"resource-stickiness": {"value": "INFINITY"}}, "corosync_token_timeout": {"get_param": "CorosyncTokenTimeout"}, "pacemaker::corosync::settle_tries": {"get_param": "CorosyncSettleTries"}, "pacemaker::resource::bundle::deep_compare": true, "pacemaker::resource::ip::deep_compare": true, "pacemaker::resource::ocf::deep_compare": true, "corosync_ipv6": {"if": ["is_ipv6", true, false]}, "tripleo::fencing::config": {"get_param": "FencingConfig"}, "tripleo::fencing::deep_compare": true, "enable_fencing": {"get_param": "EnableFencing"}, "hacluster_pwd": {"get_param": "PcsdPassword"}, "tripleo::profile::base::pacemaker::remote_authkey": {"get_param": "PacemakerRemoteAuthkey"}, "tripleo::profile::base::pacemaker::pcsd_bind_addr": {"str_replace": {"template": "%{hiera('$NETWORK')}", "params": {"$NETWORK": {"get_param": ["ServiceNetMap", "PacemakerNetwork"]}}}}}, {"if": ["pcmk_tls_priorities_empty", {}, {"tripleo::pacemaker::tls_priorities": {"get_param": "PacemakerTLSPriorities"}}]}, {"if": [{"and": ["pcmk_bundle_op_timeout_empty", {"not": "podman_enabled"}]}, {}, {"tripleo::profile::base::pacemaker::resource_op_defaults": {"bundle": {"name": "timeout", "value": {"if": ["pcmk_bundle_op_timeout_empty", "120s", {"get_param": "PacemakerBundleOperationTimeout"}]}}}}]}]}, "service_config_settings": {"rsyslog": {"tripleo_logging_sources_pacemaker": [{"get_param": "PacemakerLoggingSource"}]}}, "step_config": "include tripleo::profile::base::pacemaker\n", "host_prep_tasks": [{"name": "Make sure python3-novaclient is installed when IHA is enabled", "package": {"name": "python3-novaclient", "state": "present"}, "when": {"get_param": "EnableInstanceHA"}}, {"name": "Remove existing entries from logind conf", "ansible.builtin.lineinfile": {"path": "/etc/systemd/logind.conf", "regexp": "^\\s*#?\\s*HandlePowerKey\\s*=.*", "state": "absent"}}, {"name": "Make sure systemd-logind ignores power off", "ansible.builtin.lineinfile": {"path": "/etc/systemd/logind.conf", "regexp": "^#?HandlePowerKey", "line": "HandlePowerKey=ignore"}}, {"name": "Restart systemd-logind", "ansible.builtin.service": {"name": "systemd-logind", "state": "restarted"}}, {"name": "Gather service_facts on pacemaker_bootstrap_node", "ansible.builtin.service_facts": null, "when": ["{{(pacemaker_short_bootstrap_node_name|lower == ansible_facts['hostname']|lower)|bool}}", {"get_param": "EnableInstanceHA"}]}, {"name": "Check and eventually delete duplicate constraints (bootstrap node)", "become": true, "shell": "COUNT=$(cibadmin --query | xmllint --xpath '//rsc_location[@rsc=\"stonith-fence_compute-fence-nova\"]/@id' - |grep -oP '(?<=[\"])[^\"]*' -c)\nif [[ $COUNT > 1 ]]; then\n echo \"Detected $COUNT duplicate constraints, deleting them\"\n # assemble string with current list of computes\n GOOD=$(echo location-stonith-fence_compute-fence-nova-$(hiera pacemaker_remote_short_node_names |grep -oP '\".*?\"' |tr -d '\"'|tr -d '\\n' )--10000)\n # delete old constraints\n for i in $(cibadmin --query | xmllint --xpath '//rsc_location[@rsc=\"stonith-fence_compute-fence-nova\"]/@id' - |grep -oP '(?<=[\"])[^\"]*' |grep -v $GOOD ); do pcs constraint delete $i ; done\nelse\n echo \"No duplicate constraint found\"\nfi\n", "when": ["{{(pacemaker_short_bootstrap_node_name|lower == ansible_facts['hostname']|lower)|bool}}", {"get_param": "EnableInstanceHA"}, "ansible_facts['services']['pacemaker.service']['state'] == 'running'"]}], "scale_tasks": [{"when": ["step|int == 1", "container_cli == 'podman'", "\"pacemaker_remote\" in enabled_services|list", "inventory_hostname_short in compute_instanceha_short_node_names"], "tags": "down", "become": true, "block": [{"name": "Getting Nova compute hostname", "command": "crm_node -n", "register": "nova_compute_hostname"}, {"name": "Check if pacemaker_short_bootstrap_node_name is a defined variable", "when": "pacemaker_short_bootstrap_node_name is not defined", "fail": {"msg": "Cannot delegate pacemaker cleanup to the bootstrap node. Please delete the resources manually."}, "ignore_errors": true}, {"name": "Clean up Pacemaker remote and STONITH resources for Compute node", "when": ["nova_compute_hostname.stdout in compute_instanceha_short_node_names", "pacemaker_short_bootstrap_node_name is defined"], "block": [{"name": "List STONITH resource for the Compute node", "command": "stonith_admin -l {{ nova_compute_hostname.stdout }}", "register": "stonith_service_results", "delegate_to": "{{ pacemaker_short_bootstrap_node_name }}"}, {"name": "Disable the STONITH resources for the Compute node", "command": "pcs stonith disable \"{{ item }}\"", "loop": "{{ stonith_service_results.stdout | regex_findall('^(stonith-fence(?!_compute-fence-nova).*)', multiline=True)}}", "delegate_to": "{{ pacemaker_short_bootstrap_node_name }}", "register": "disable_output", "failed_when": "disable_output.rc != 0", "retries": 3, "delay": 5, "until": "disable_output.rc == 0"}, {"name": "Delete the STONITH resources for the Compute node", "command": "pcs stonith delete \"{{ item }}\"", "loop": "{{ stonith_service_results.stdout | regex_findall('^(stonith-fence(?!_compute-fence-nova).*)', multiline=True)}}", "delegate_to": "{{ pacemaker_short_bootstrap_node_name }}", "register": "delete_output", "failed_when": "delete_output.rc != 0", "retries": 3, "delay": 5, "until": "delete_output.rc == 0"}, {"name": "Clear the stonith level hierarchy for the Compute node target", "command": "pcs stonith level clear target {{ nova_compute_hostname.stdout }}", "delegate_to": "{{ pacemaker_short_bootstrap_node_name }}"}, {"name": "Delete Compute node from cluster", "command": "pcs cluster node remove-remote {{ nova_compute_hostname.stdout }}", "delegate_to": "{{ pacemaker_short_bootstrap_node_name }}"}, {"name": "Remove compute node in pcmk_host_list parameter from stonith-fence_compute-fence-nova", "command": "crm_resource --set-parameter=pcmk_host_list -r stonith-fence_compute-fence-nova -v \"{{ pacemaker_remote_short_node_names | reject('search', nova_compute_hostname.stdout) | join(',') }}\"", "delegate_to": "{{ pacemaker_short_bootstrap_node_name }}"}]}]}], "upgrade_tasks": [{"name": "Ensure redis is removed", "when": ["step|int == 5", "\"redis\" not in enabled_services|list", "{{ (pacemaker_short_bootstrap_node_name|lower == ansible_facts['hostname']|lower)|bool}}"], "become": true, "shell": "if crm_resource -r redis-bundle -q &>/dev/null; then\n pcs resource delete redis-bundle || true\n pcs resource delete ip-$(hiera redis_vip) || true\nfi\n"}, {"name": "Clean up cluster node cache", "when": ["step|int == 5", "\"redis\" not in enabled_services|list", "{{ (pacemaker_short_bootstrap_node_name|lower == ansible_facts['hostname']|lower)|bool}}"], "become": true, "shell": "pcs cluster node clear redis-bundle-0\npcs cluster node clear redis-bundle-1\npcs cluster node clear redis-bundle-2\ncrm_attribute --name redis_REPL_INFO --delete\n"}, {"name": "Clean up redis attribute", "when": ["step|int == 5", "\"redis\" not in enabled_services|list", "{{ (pacemaker_short_bootstrap_node_name|lower == ansible_facts['hostname']|lower)|bool}}"], "become": true, "shell": "pcs node attribute \"{{ item }}\" redis-role= || true\n", "loop": "{{ pacemaker_short_node_names }}"}], "external_upgrade_tasks": [{"when": ["step|int == 1"], "tags": ["never", "system_upgrade_stop_services", "system_upgrade_transfer_data"], "block": [{"name": "Stop cluster", "become": true, "shell": "set -eu\nFILE=/usr/sbin/pcs\nif test -f \"$FILE\"; then\n /usr/sbin/pcs cluster stop --force\nfi\n", "delegate_to": "{{ item }}", "with_items": "{{ groups['pacemaker'] | difference(groups['excluded_overcloud']) }}"}]}], "update_tasks": [{"name": "Check pacemaker cluster running before the minor update", "when": "step|int == 0", "pacemaker_cluster": "state=online check_and_fail=true", "async": 30, "poll": 4}, {"name": "Acquire the cluster shutdown lock to stop pacemaker cluster", "when": "step|int == 1", "command": "systemd-cat -t ha-shutdown /var/lib/container-config-scripts/pacemaker_mutex_shutdown.sh --acquire"}, {"name": "Stop pacemaker cluster", "when": "step|int == 1", "pacemaker_cluster": "state=offline"}, {"name": "Start pacemaker cluster", "when": "step|int == 4", "pacemaker_cluster": "state=online"}, {"name": "Release the cluster shutdown lock", "when": "step|int == 4", "command": "systemd-cat -t ha-shutdown /var/lib/container-config-scripts/pacemaker_mutex_shutdown.sh --release"}]}}}}