我的高可用性软 Pacemaker 出现错误,我将它与 drbd 一起使用。然后配置,我使用命令 crm_mon 获得下一条消息:
Last updated: Fri Aug 26 10:57:32 2016 Last change: Fri Aug 26 17:33:11 2016 by root via cibadmin on node-primary
Stack: corosync
Current DC: node-primary (version 1.1.13-10.el7_2.4-44eb2dd) - partition with quorum
2 nodes and 3 resources configured
Online: [ node-primary node-secondary ]
virtual_ip (ocf::heartbeat:IPaddr2): Started node-primary
Master/Slave Set: drbd-data-sync [drbd-data]
Slaves: [ node-primary node-secondary ]"
As you see, both nodes are slaves on the resource drbd-data, so, after my research, I don't find the solution of the issue,
供您参考,以下是起搏器/corosync 的日志:
/var/log/消息
"Aug 26 10:58:06 node-primary lrmd[3731]: notice: drbd-data_monitor_60000:7464:stderr [ Error signing on to the CIB service: Transport endpoint is not connected ]
Aug 26 10:59:21 node-primary drbd(drbd-data)[7629]: ERROR: testdata: Called /usr/sbin/crm_master -Q -l reboot -v 10000
Aug 26 10:59:21 node-primary drbd(drbd-data)[7629]: ERROR: testdata: Exit code 107
Aug 26 10:59:21 node-primary drbd(drbd-data)[7629]: ERROR: testdata: Command output:
Aug 26 10:59:21 node-primary lrmd[3731]: notice: drbd-data_monitor_60000:7629:stderr [ Error signing on to the CIB service: Transport endpoint is not connected ]"
/var/log/cluster/corosync.log
"Aug 26 10:59:06 [7653] node-primary.localdomain crm_node: error: get_local_nodeid: Could not get local node id from the CPG API: Access error (11)
drbd(drbd-data)[7629]: 2016/08/26_10:59:21 ERROR: testdata: Called /usr/sbin/crm_master -Q -l reboot -v 10000
drbd(drbd-data)[7629]: 2016/08/26_10:59:21 ERROR: testdata: Exit code 107
drbd(drbd-data)[7629]: 2016/08/26_10:59:21 ERROR: testdata: Command output:
Aug 26 10:59:21 [3731] node-primary.localdomain lrmd: notice: operation_finished: drbd-data_monitor_60000:7629:stderr [ Error signing on to the CIB service: Transport endpoint is not connected ]
Aug 26 11:00:21 [7855] node-primary.localdomain crm_node: error: get_local_nodeid: Could not get local node id from the CPG API: Access error (11)
drbd(drbd-data)[7831]: 2016/08/26_11:00:36 ERROR: testdata: Called /usr/sbin/crm_master -Q -l reboot -v 10000
drbd(drbd-data)[7831]: 2016/08/26_11:00:36 ERROR: testdata: Exit code 107
drbd(drbd-data)[7831]: 2016/08/26_11:00:36 ERROR: testdata: Command output:
Aug 26 11:00:36 [3731] node-primary.localdomain lrmd: notice: operation_finished: drbd-data_monitor_60000:7831:stderr [ Error signing on to the CIB service: Transport endpoint is not connected ]"
这是 drbd 日志:
`<cib crm_feature_set="3.0.10" validate-with="pacemaker-2.3" epoch="12" num_updat es="14" admin_epoch="0" cib-last-written="Thu Sep 1 10:24:11 2016" update-origi n="node-secondary" update-client="cibadmin" update-user="root" have-quorum="1" d c-uuid="1">
<configuration>
<crm_config>
<cluster_property_set id="cib-bootstrap-options">
<nvpair id="cib-bootstrap-options-have-watchdog" name="have-watchdog" va lue="false"/>
<nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="1 .1.13-10.el7_2.4-44eb2dd"/>
<nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster- infrastructure" value="corosync"/>
<nvpair id="cib-bootstrap-options-cluster-name" name="cluster-name" valu e="drbd_cluster"/>
<nvpair id="cib-bootstrap-options-stonith-enabled" name="stonith-enabled " value="false"/>
<nvpair id="cib-bootstrap-options-no-quorum-policy" name="no-quorum-poli cy" value="ignore"/>
</cluster_property_set>
</crm_config>
<nodes>
<node id="1" uname="node-primary"/>
<node id="2" uname="node-secondary"/>
</nodes>
<resources>
<primitive class="ocf" id="virt_ip" provider="heartbeat" type="IPaddr2">
<instance_attributes id="virt_ip-instance_attributes">
<nvpair id="virt_ip-instance_attributes-ip" name="ip" value="10.44.16. 59"/>
<nvpair id="virt_ip-instance_attributes-cidr_netmask" name="cidr_netma sk" value="21"/>
</instance_attributes>
<operations>
<op id="virt_ip-start-interval-0s" interval="0s" name="start" timeout= "20s"/>
<op id="virt_ip-stop-interval-0s" interval="0s" name="stop" timeout="2 0s"/>
<op id="virt_ip-monitor-interval-10s" interval="10s" name="monitor"/>
</operations>
</primitive>
<master id="drbd_data_clone">
<primitive class="ocf" id="drbd_data" provider="linbit" type="drbd">
<instance_attributes id="drbd_data-instance_attributes">
<nvpair id="drbd_data-instance_attributes-drbd_resource" name="drbd_ resource" value="testdata"/>
</instance_attributes>
<operations>
<op id="drbd_data-start-interval-0s" interval="0s" name="start" time out="240"/>
<op id="drbd_data-promote-interval-0s" interval="0s" name="promote" timeout="90"/>
<op id="drbd_data-demote-interval-0s" interval="0s" name="demote" ti meout="90"/>
<op id="drbd_data-stop-interval-0s" interval="0s" name="stop" timeou t="100"/>
<op id="drbd_data-monitor-interval-10s" interval="10s" name="monitor "/>
</operations>
</primitive>
<meta_attributes id="drbd_data_clone-meta_attributes">
<nvpair id="drbd_data_clone-meta_attributes-master_max" name="master_m ax" value="1"/>
<nvpair id="drbd_data_clone-meta_attributes-master-node-max" name="mas ter-node-max" value="1"/>
<nvpair id="drbd_data_clone-meta_attributes-clone-max" name="clone-max " value="2"/>
<nvpair id="drbd_data_clone-meta_attributes-clone-node-max" name="clon e-node-max" value="1"/>
<nvpair id="drbd_data_clone-meta_attributes-notify" name="notify" valu e="true"/>
</meta_attributes>
</master>
</resources>
<constraints/>
<rsc_defaults>
<meta_attributes id="rsc_defaults-options">
<nvpair id="rsc_defaults-options-resource-stickiness" name="resource-sti ckiness" value="100"/>
</meta_attributes>
</rsc_defaults>
</configuration>
<status>
<node_state id="1" uname="node-primary" in_ccm="true" crmd="online" crm-debu g-origin="do_update_resource" join="member" expected="member">
<transient_attributes id="1">
<instance_attributes id="status-1">
<nvpair id="status-1-shutdown" name="shutdown" value="0"/>
<nvpair id="status-1-probe_complete" name="probe_complete" value="true "/>
</instance_attributes>
</transient_attributes>
<lrm id="1">
<lrm_resources>
<lrm_resource id="virt_ip" type="IPaddr2" class="ocf" provider="heartb eat">
<lrm_rsc_op id="virt_ip_last_0" operation_key="virt_ip_start_0" oper ation="start" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.10" trans ition-key="7:7:0:877587e4-bfb4-47f7-8b57-56ade8f208ce" transition-magic="0:0;7:7 :0:877587e4-bfb4-47f7-8b57-56ade8f208ce" on_node="node-primary" call-id="6" rc-c ode="0" op-status="0" interval="0" last-run="1472739600" last-rc-change="1472739 600" exec-time="52" queue-time="0" op-digest="39b1ba53e0441cc0c307a4842f021a16"/ >
<lrm_rsc_op id="virt_ip_monitor_10000" operation_key="virt_ip_monito r_10000" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set ="3.0.10" transition-key="8:7:0:877587e4-bfb4-47f7-8b57-56ade8f208ce" transition -magic="0:0;8:7:0:877587e4-bfb4-47f7-8b57-56ade8f208ce" on_node="node-primary" c all-id="7" rc-code="0" op-status="0" interval="10000" last-rc-change="1472739600 " exec-time="34" queue-time="0" op-digest="cb43afd49faa0fa13e9a578ba0eb96f6"/>
</lrm_resource>
<lrm_resource id="drbd_data" type="drbd" class="ocf" provider="linbit" >
<lrm_rsc_op id="drbd_data_last_failure_0" operation_key="drbd_data_m onitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_ set="3.0.10" transition-key="5:9:7:877587e4-bfb4-47f7-8b57-56ade8f208ce" transit ion-magic="0:8;5:9:7:877587e4-bfb4-47f7-8b57-56ade8f208ce" on_node="node-primary " call-id="12" rc-code="8" op-status="0" interval="0" last-run="1472739851" last -rc-change="1472739851" exec-time="15085" queue-time="0" op-digest="cb1c1281eec7 e6bf06e0cd1385ce89d9"/>
<lrm_rsc_op id="drbd_data_last_0" operation_key="drbd_data_demote_0" operation="demote" crm-debug-origin="do_update_resource" crm_feature_set="3.0.1 0" transition-key="9:10:0:877587e4-bfb4-47f7-8b57-56ade8f208ce" transition-magic ="0:0;9:10:0:877587e4-bfb4-47f7-8b57-56ade8f208ce" on_node="node-primary" call-i d="14" rc-code="0" op-status="0" interval="0" last-run="1472739866" last-rc-chan ge="1472739866" exec-time="36" queue-time="0" op-digest="cb1c1281eec7e6bf06e0cd1 385ce89d9"/>
<lrm_rsc_op id="drbd_data_monitor_10000" operation_key="drbd_data_mo nitor_10000" operation="monitor" crm-debug-origin="do_update_resource" crm_featu re_set="3.0.10" transition-key="11:10:0:877587e4-bfb4-47f7-8b57-56ade8f208ce" tr ansition-magic="0:0;11:10:0:877587e4-bfb4-47f7-8b57-56ade8f208ce" on_node="node- primary" call-id="16" rc-code="0" op-status="0" interval="10000" last-rc-change= "1472739881" exec-time="15058" queue-time="0" op-digest="a272c0f296d99478464e690 82b608c2c"/>
</lrm_resource>
</lrm_resources>
</lrm>
</node_state>
<node_state id="2" uname="node-secondary" crmd="online" crm-debug-origin="do _update_resource" in_ccm="true" join="member" expected="member">
<transient_attributes id="2">
<instance_attributes id="status-2">
<nvpair id="status-2-shutdown" name="shutdown" value="0"/>
<nvpair id="status-2-probe_complete" name="probe_complete" value="true "/>
</instance_attributes>
</transient_attributes>
<lrm id="2">
<lrm_resources>
<lrm_resource id="virt_ip" type="IPaddr2" class="ocf" provider="heartb eat">
<lrm_rsc_op id="virt_ip_last_0" operation_key="virt_ip_monitor_0" op eration="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.10" t ransition-key="6:7:7:877587e4-bfb4-47f7-8b57-56ade8f208ce" transition-magic="0:7 ;6:7:7:877587e4-bfb4-47f7-8b57-56ade8f208ce" on_node="node-secondary" call-id="5 " rc-code="7" op-status="0" interval="0" last-run="1472743200" last-rc-change="1 472743200" exec-time="49" queue-time="0" op-digest="39b1ba53e0441cc0c307a4842f02 1a16"/>
</lrm_resource>
<lrm_resource id="drbd_data" type="drbd" class="ocf" provider="linbit" >
<lrm_rsc_op id="drbd_data_last_failure_0" operation_key="drbd_data_m onitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_ set="3.0.10" transition-key="7:9:7:877587e4-bfb4-47f7-8b57-56ade8f208ce" transit ion-magic="0:0;7:9:7:877587e4-bfb4-47f7-8b57-56ade8f208ce" on_node="node-seconda ry" call-id="10" rc-code="0" op-status="0" interval="0" last-run="1472743451" la st-rc-change="1472743451" exec-time="15058" queue-time="0" op-digest="cb1c1281ee c7e6bf06e0cd1385ce89d9"/>
<lrm_rsc_op id="drbd_data_last_0" operation_key="drbd_data_monitor_0 " operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.0 .10" transition-key="7:9:7:877587e4-bfb4-47f7-8b57-56ade8f208ce" transition-magi c="0:0;7:9:7:877587e4-bfb4-47f7-8b57-56ade8f208ce" on_node="node-secondary" call -id="10" rc-code="0" op-status="0" interval="0" last-run="1472743451" last-rc-ch ange="1472743451" exec-time="15058" queue-time="0" op-digest="cb1c1281eec7e6bf06 e0cd1385ce89d9"/>
<lrm_rsc_op id="drbd_data_monitor_10000" operation_key="drbd_data_mo nitor_10000" operation="monitor" crm-debug-origin="do_update_resource" crm_featu re_set="3.0.10" transition-key="14:10:0:877587e4-bfb4-47f7-8b57-56ade8f208ce" tr ansition-magic="0:0;14:10:0:877587e4-bfb4-47f7-8b57-56ade8f208ce" on_node="node- secondary" call-id="13" rc-code="0" op-status="0" interval="10000" last-rc-chang e="1472743482" exec-time="15054" queue-time="0" op-digest="a272c0f296d99478464e6 9082b608c2c"/>
</lrm_resource>
</lrm_resources>
</lrm>
</node_state>
</status>
</cib>
和 corosync 日志 :) :
totem {
version: 2
secauth: off
cluster_name: drbd_cluster
transport: udpu
}
nodelist {
node {
ring0_addr: node-primary
nodeid: 1
}
node {
ring0_addr: node-secondary
nodeid: 2
}
}
quorum {
provider: corosync_votequorum
two_node: 1
}
logging {
to_logfile: yes
logfile: /var/log/cluster/corosync.log
to_syslog: yes
}
你能帮我解决这个问题吗?
谢谢你的帮助,
杰弗里