MySQL+Keepalived双主互备架构搭架
配置前进行校时操作
目前创新互联已为上千的企业提供了网站建设、域名、网站空间、网站托管、企业网站设计、宜黄网站维护等服务,公司将坚持客户导向、应用为本的策略,正道将秉承"和谐、参与、激情"的文化,与客户和合作伙伴齐心协力一起成长,共同发展。
#安装ntpdate工具
yum install ntpdate -y
#使用ntpdate校时(后面的是ntp服务器)
ntpdate pool.ntp.org
0.测试mysql安装
yum install mariadb mariadb-server -y
systemctl restart mariadb
systemctl enable mariadb
mysql_secure_installation
1.DB1修改配置文件(需重启)
vim /etc/my.cnf
#在[mysqld]添加
server-id=166
#开启mysql日志功能
log-bin=mysql-bin
#定义日志命名格式
relay-log=mysql-relay-bin
#以下table复制过滤
#replicate-wild-ignore-table=test.%
#replicate-wild-ignore-table=mysql.%
#replicate-wild-ignore-table=performance_schema.%
2.DB2修改配置文件(需重启)
vim /etc/my.cnf
#在[mysqld]添加
server-id=168
#开启mysql日志功能
log-bin=mysql-bin
#定义日志命名格式
relay-log=mysql-relay-bin
3.DB1,DB2分别创建复制帐号
mysql -u root -p
#创建用户slave_up允许从192.168.1网段登录
create user 'slave_cp'@'192.168.1.%' identified by 'pass';
grant replication slave on *.* to 'slave_cp'@'192.168.1.%';
exit
4.DB1获取二进制日志信息
mysql -u root -p
#对数据库进行只读锁定(防止查看二进制日志同时有人对数据库修改操作)
flush tables with read lock;
#查询主机二进制文件信息
show master status;
+------------------+----------+--------------+------------------+
| File | Position | Binlog_Do_DB | Binlog_Ignore_DB |
+------------------+----------+--------------+------------------+
| mysql-bin.000001 | 494 | | |
+------------------+----------+--------------+------------------+
1 row in set (0.00 sec)
#解除只读锁定
unlock tables;
exit
5.同步数据库
#DB1
#备份主数据库,并上传给从数据库
mysqldump -uroot -p123456 --all-databases --lock-all-tables >/tmp/dbdump.sql
scp /tmp/dbdump.sql 192.168.1.168:/tmp/
#DB2
#从服务器导入主服务器数据库
mysql -uroot -p
6.在DB2上将DB1设置为主服务器
#开启mysql防火墙端口
firewall-cmd --add-port=3306/tcp --permanent
firewall-cmd --reload
change master to
master_host='192.168.1.168' ,
master_user='slave_cp',
master_password='pass',
master_log_file='mysql-bin.000001',
master_log_pos=494;
#启动slave
start slave;
#查看是否正常工作
show slave status\G
exit
7.在DB2获取二进制日志信息
mysql -u root -p
#对数据库进行只读锁定(防止查看二进制日志同时有人对数据库修改操作)
flush tables with read lock;
#查询主机二进制文件信息
show master status;
+------------------+----------+--------------+------------------+
| File | Position | Binlog_Do_DB | Binlog_Ignore_DB |
+------------------+----------+--------------+------------------+
| mysql-bin.000001 | 513721| | |
+------------------+----------+--------------+------------------+
1 row in set (0.00 sec)
#解除只读锁定
unlock tables;
exit
8.在DB1将DB2设置为主服务器
change master to
master_host='192.168.1.168' ,
master_user='slave_cp',
master_password='pass',
master_log_file='mysql-bin.000001',
master_log_pos=513721;
#启动slave
start slave;
#查看是否正常工作
show slave status\G
exit
1.keepalived配置文件配置
! Configuration File for keepalived
global_defs {
#设置报警通知邮件地址,可以设置多个
notification_email {
msun1996@163.com
}
#设置邮件的发送地址
notification_email_from keepalived@msun.com
#设置smtp server的地址,该地址必须是存在的
smtp_server 127.0.0.1
#设置连接smtp server的超时时间
smtp_connect_timeout 30
#运行Keepalived服务器的标识,发邮件时显示在邮件标题中的信息
router_id mysql_msun
}
#检测haproxy脚本
vrrp_script chk_mysql {
script "/etc/keepalived/mysqlcheck/check_slave.sh"
interval 2
weight 2
}
#定义VRRP实例,实例名自定义
vrrp_instance mysql_msun {
#指定Keepalived的角色,MASTER主机 BACKUP备份
state BACKUP #此处两个都设置为BACKUP
#指定HA监测的接口
interface eno16777736
#虚拟路由标识,这个标识是一个数字(1-255),在一个VRRP实例中主备服务器ID必须一样
virtual_router_id 68
#优先级,数字越大优先级越高,在一个实例中主服务器优先级要高于备服务器
priority 100 #从服务器99
#设置主备之间同步检查的时间间隔单位秒
advert_int 1
#设置不抢占模式(DB1设置即可)
nopreempt
#设置验证类型和密码
authentication {
#验证类型有两种{PASS|HA}
auth_type PASS
#设置验证密码,在一个实例中主备密码保持一样
auth_pass 1689
}
track_script {
chk_mysql # 执行监控的服务
}
#定义虚拟IP地址,可以有多个,每行一个
virtual_ipaddress {
192.168.1.160
}
}
2.创建监控脚本(/etc/keepalived/mysqlcheck/check_slave.sh)
#!/bin/bash
#This scripts is check for Mysql Slave status
Mysqlbin=/usr/bin/mysql
user=root
pw='123456'
port=3306
host=127.0.0.1
#最大延时
sbm=120
#Check for $Mysqlbin
if [ ! -f $Mysqlbin ];then
echo 'Mysqlbin not found,check the variable Mysqlbin'
exit 99
fi
#Get Mysql Slave Status
IOThread=`$Mysqlbin -h $host -P $port -u$user -p$pw -e 'show slave status\G' 2>/dev/null|grep 'Slave_IO_Running:'|awk '{print $NF}'`
SQLThread=`$Mysqlbin -h $host -P $port -u$user -p$pw -e 'show slave status\G' 2>/dev/null|grep 'Slave_SQL_Running:'|awk '{print $NF}'`
SBM=`$Mysqlbin -h $host -P $port -u$user -p$pw -e 'show slave status\G' 2>/dev/null|grep 'Seconds_Behind_Master:'|awk '{print $NF}'`
#Check if the mysql run
if [[ -z "$IOThread" ]];then
exit 1
fi
#Check if the thread run
if [[ "$IOThread" == "No" || "$SQLThread" == "No" ]];then
exit 1
elif [[ $SBM -ge $sbm ]];then
exit 1
else
exit 0
fi
3.防火墙策略
#运行VPPR组播(keepalived)
firewall-cmd --permanent --add-rich='rule family="ipv4" destination address="224.0.0.18" protocol value="vrrp" accept'
firewall-cmd --reload
4.测试
1.正常启动DB1、DB2信息
[root@DB1 ~]# ip a
1: lo: mtu 65536 qdisc noqueue state UNKNOWN
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eno16777736: mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 00:0c:29:43:30:13 brd ff:ff:ff:ff:ff:ff
inet 192.168.1.166/24 brd 192.168.1.255 scope global eno16777736
valid_lft forever preferred_lft forever
inet 192.168.1.160/32 scope global eno16777736
valid_lft forever preferred_lft forever
inet6 fe80::20c:29ff:fe43:3013/64 scope link
valid_lft forever preferred_lft forever
[root@DB2 ~]# ip a
1: lo: mtu 65536 qdisc noqueue state UNKNOWN
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eno16777736: mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 00:0c:29:da:7d:9d brd ff:ff:ff:ff:ff:ff
inet 192.168.1.168/24 brd 192.168.1.255 scope global eno16777736
valid_lft forever preferred_lft forever
inet6 fe80::20c:29ff:feda:7d9d/64 scope link
valid_lft forever preferred_lft forever
2.关闭DB1数据库,观察IP
[root@DB1 ~]# systemctl stop mariadb
[root@DB1 ~]# ip a
1: lo: mtu 65536 qdisc noqueue state UNKNOWN
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eno16777736: mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 00:0c:29:43:30:13 brd ff:ff:ff:ff:ff:ff
inet 192.168.1.166/24 brd 192.168.1.255 scope global eno16777736
valid_lft forever preferred_lft forever
inet6 2001:da8:4002:3301:20c:29ff:fe43:3013/64 scope global noprefixroute dynamic
valid_lft 2591817sec preferred_lft 604617sec
inet6 fe80::20c:29ff:fe43:3013/64 scope link
valid_lft forever preferred_lft forever
[root@DB2 ~]# ip a
1: lo: mtu 65536 qdisc noqueue state UNKNOWN
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eno16777736: mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 00:0c:29:da:7d:9d brd ff:ff:ff:ff:ff:ff
inet 192.168.1.168/24 brd 192.168.1.255 scope global eno16777736
valid_lft forever preferred_lft forever
inet 192.168.1.160/32 scope global eno16777736
valid_lft forever preferred_lft forever
inet6 2001:da8:4002:3301:20c:29ff:feda:7d9d/64 scope global noprefixroute dynamic
valid_lft 2591669sec preferred_lft 604469sec
inet6 fe80::20c:29ff:feda:7d9d/64 scope link
valid_lft forever preferred_lft forever
3.重启主机mariadb,观察IP
[root@DB1 ~]# ip a #DB1不切换成主机(因为设置为不抢占模式)
1: lo: mtu 65536 qdisc noqueue state UNKNOWN
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eno16777736: mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 00:0c:29:43:30:13 brd ff:ff:ff:ff:ff:ff
inet 192.168.1.166/24 brd 192.168.1.255 scope global eno16777736
valid_lft forever preferred_lft forever
inet6 2001:da8:4002:3301:20c:29ff:fe43:3013/64 scope global noprefixroute dynamic
valid_lft 2591605sec preferred_lft 604405sec
inet6 fe80::20c:29ff:fe43:3013/64 scope link
valid_lft forever preferred_lft forever
[root@DB2 ~]# ip a
1: lo: mtu 65536 qdisc noqueue state UNKNOWN
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eno16777736: mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 00:0c:29:da:7d:9d brd ff:ff:ff:ff:ff:ff
inet 192.168.1.168/24 brd 192.168.1.255 scope global eno16777736
valid_lft forever preferred_lft forever
inet 192.168.1.160/32 scope global eno16777736
valid_lft forever preferred_lft forever
inet6 2001:da8:4002:3301:20c:29ff:feda:7d9d/64 scope global noprefixroute dynamic
valid_lft 2591533sec preferred_lft 604333sec
inet6 fe80::20c:29ff:feda:7d9d/64 scope link
valid_lft forever preferred_lft forever
4.再次关闭DB2的maraiadb,问题出现了,预想当DB2宕机,IP未能正常漂移!!!
[root@DB2 ~]# systemctl stop mariadb
[root@DB2 ~]# ip a
1: lo: mtu 65536 qdisc noqueue state UNKNOWN
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eno16777736: mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 00:0c:29:da:7d:9d brd ff:ff:ff:ff:ff:ff
inet 192.168.1.168/24 brd 192.168.1.255 scope global eno16777736
valid_lft forever preferred_lft forever
inet 192.168.1.160/32 scope global eno16777736
valid_lft forever preferred_lft forever
inet6 2001:da8:4002:3301:20c:29ff:feda:7d9d/64 scope global noprefixroute dynamic
valid_lft 2591326sec preferred_lft 604126sec
inet6 fe80::20c:29ff:feda:7d9d/64 scope link
valid_lft forever preferred_lft forever
[root@DB1 ~]# ip a
1: lo: mtu 65536 qdisc noqueue state UNKNOWN
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eno16777736: mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 00:0c:29:43:30:13 brd ff:ff:ff:ff:ff:ff
inet 192.168.1.166/24 brd 192.168.1.255 scope global eno16777736
valid_lft forever preferred_lft forever
inet6 2001:da8:4002:3301:20c:29ff:fe43:3013/64 scope global noprefixroute dynamic
valid_lft 2591241sec preferred_lft 604041sec
inet6 fe80::20c:29ff:fe43:3013/64 scope link
valid_lft forever preferred_lft forever
关于这个问题的思考:
先说说本案例中采用的是Keepalived+mysql的组合,Keepalived用了双备模式,采用脚本检测应用是否正常来决定keepalived的权重,从而让正常的数据库能得到VIP而正常工作,考虑到了如果DB1宕掉(仅mysql服务)(DB1的权重会低于DB2),DB2接管服务,而DB1再次正常时(权重又会高于DB2),为了避免数据库来回切换,而在DB1的keepalived配置中设置了不抢占模式来避免此问题。但是,导致了一个新的问题,就算是DB2的数据库不正常工作,(权重更低于DB2),但因为DB1不抢占模式,DB2仍然会占有VIP,(除非是Keepalived服务宕掉)。
关于这个问题的思考解决方案:
1.keepalived取消使用不抢占模式,从而使主机切换正常,但这样也就使DB1的数据库服务宕掉再正常启动时,会抢占DB2的VIP,使数据库进行不必要的切换。
2.修改检测脚本,keepalived使用检测脚本正常时返回0,不正常时,直接让keepalived服务本身宕掉。这样情况下,DB1服务mysql服务再次正常(并启动keepalived),不会抢占DB2的VIP;当DB2mysql服务不正常工作时,(会自动停止keepalived),从而使DB1正常得到VIP。
#!/bin/bash
#This scripts is check for Mysql Slave status
Mysqlbin=/usr/bin/mysql
user=root
pw='123456'
port=3306
host=127.0.0.1
#最大延时
sbm=120
#Check for $Mysqlbin
if [ ! -f $Mysqlbin ];then
echo 'Mysqlbin not found,check the variable Mysqlbin'
pkill keepalived
fi
#Get Mysql Slave Status
IOThread=`$Mysqlbin -h $host -P $port -u$user -p$pw -e 'show slave status\G' 2>/dev/null|grep 'Slave_IO_Running:'|awk '{print $NF}'`
SQLThread=`$Mysqlbin -h $host -P $port -u$user -p$pw -e 'show slave status\G' 2>/dev/null|grep 'Slave_SQL_Running:'|awk '{print $NF}'`
SBM=`$Mysqlbin -h $host -P $port -u$user -p$pw -e 'show slave status\G' 2>/dev/null|grep 'Seconds_Behind_Master:'|awk '{print $NF}'`
#Check if the mysql run
if [[ -z "$IOThread" ]];then
pkill keepalived
fi
#Check if the thread run
if [[ "$IOThread" == "No" || "$SQLThread" == "No" ]];then
pkill keepalived
elif [[ $SBM -ge $sbm ]];then
pkill keepalived
else
exit 0
fi