MHA主库检查参数
MHA从0.53版本开始支持ping_type参数来设置如何检查master可用性:
ping_type=select: 基于一个到master的已经存在的连接执行select 1,连接被重复使用,select检查能快速返回结果,但检查过于简单,无法发现更多故障。
ping_type=connect: 在每次执行select 1操作前后创建和断开连接,能更严格和更快速发现TCP连接级别的故障。
ping_type=insert: 基于一个到master的已经存在的连接执行insert语句,连接被重复使用,能更好检测到数据库因磁盘空间耗尽或磁盘IO资源耗尽导致的故障。在0.56版本被引入。
MHA使用ping_interval来设置MHA Manager探测主库故障的间隔,默认间隔3秒,当连续4次ping失败后,则判定master节点发生故障。
MHA主库检查脚本
ping_select代码(/lib/MHA/HealthCheck.pm)
sub ping_select($) {
my $self = shift;
my $log = $self->{logger};
my $dbh = $self->{dbh};
my ( $query, $sth, $href );
eval {
$dbh->{RaiseError} = 1;
$sth = $dbh->prepare("SELECT 1 As Value");
$sth->execute();
$href = $sth->fetchrow_hashref;
if ( !defined($href)
|| !defined( $href->{Value} )
|| $href->{Value} != 1 )
{
die;
}
};
if ($@) {
my $msg = "Got error on MySQL select ping: ";
undef $@;
$msg .= $DBI::err if ($DBI::err);
$msg .= " ($DBI::errstr)" if ($DBI::errstr);
$log->warning($msg) if ($log);
return 1;
}
return 0;
}
ping_connect代码(/lib/MHA/HealthCheck.pm)
sub ping_connect($) {
my $self = shift;
my $log = $self->{logger};
my $dbh;
my $rc = 1;
my $max_retries = 2;
eval {
my $ping_start = [gettimeofday];
while ( !$self->{dbh} && $max_retries-- ) {
eval { $rc = $self->connect( 1, $self->{interval}, 0, 0, 1 ); };
if ( !$self->{dbh} && $@ ) {
die $@ if ( !$max_retries );
}
}
$rc = $self->ping_select();
# To hold advisory lock for some periods of time
$self->sleep_until( $ping_start, $self->{interval} - 1.5 );
$self->disconnect_if();
};
if ($@) {
my $msg = "Got error on MySQL connect ping: $@";
undef $@;
$msg .= $DBI::err if ($DBI::err);
$msg .= " ($DBI::errstr)" if ($DBI::errstr);
$log->warning($msg) if ($log);
$rc = 1;
}
return 2 if ( $self->{_already_monitored} );
return $rc;
}
ping_insert代码(/lib/MHA/HealthCheck.pm)
sub ping_insert($) {
my $self = shift;
my $log = $self->{logger};
my $dbh = $self->{dbh};
my ( $query, $sth, $href );
eval {
$dbh->{RaiseError} = 1;
$dbh->do("CREATE DATABASE IF NOT EXISTS infra");
$dbh->do(
"CREATE TABLE IF NOT EXISTS infra.chk_masterha (`key` tinyint NOT NULL primary key,`val` int(10) unsigned NOT NULL DEFAULT '0') engine=MyISAM"
);
$dbh->do(
"INSERT INTO infra.chk_masterha values (1,unix_timestamp()) ON DUPLICATE KEY UPDATE val=unix_timestamp()"
);
};
if ($@) {
my $msg = "Got error on MySQL insert ping: ";
undef $@;
$msg .= $DBI::err if ($DBI::err);
$msg .= " ($DBI::errstr)" if ($DBI::errstr);
$log->warning($msg) if ($log);
return 1;
}
return 0;
}
连续4次检查失败则判定主库发生故障
if ( $error_count >= 4 ) {
$ssh_reachable = $self->is_ssh_reachable();
$master_is_down = 1 if ( $self->is_secondary_down() );
last if ($master_is_down);
$error_count = 0;
}