在上一篇博客Replication
1中我们构建了一个简单的主从复制架构的redis服务,利用Replication我们可以让redis提供读写分离、提供读取能力、数据备份等功能,也支持slave从鼓掌中恢复。但是如果master出现了故障,那么整Replication都会处于一个不可用的状态。这显然无法满足高可用的目标。而整个目标需要借助sentinel
2来实现。
sentinel概述:
- 监控:sentinel是一个分布式系统,多个实例利用
gossip
协议协同工作。sentinel监控master和slave实例,同时sentinel实例之间也互相监控。 - 通知: 当某一个实例出现问题时,sentinel可以通过API通知系统管理员。
- 自动故障转移:如果被监控的master出现问题,sentinel可以启动一个自动故障转移的过程,sentinel实例之间选举出来一个slave提升为master,然后配置其他slave的配置使其成为新的master的slave,并且通知client使用新的连接地址。
- 配置提供者:client不再之间连接到master或者slave,而是连接到sentinel,由sentinel提供redis的master和slave的地址。
1 搭建环境
运行sentinel有两种方式:
redis-sentinel /path/to/sentinel.conf
;redis-server /path/to/sentinel.conf --sentinel
;
两种方式完全一样的(通常redis-sentinel
文件是redis-server
的一个符号连接,redis-server启动时会做如下检查):
/* Returns 1 if there is --sentinel among the arguments or if
* argv[0] contains "redis-sentinel". */
int checkForSentinelMode(int argc, char **argv) {
int j;
if (strstr(argv[0],"redis-sentinel") != NULL) return 1;
for (j = 1; j < argc; j++)
if (!strcmp(argv[j],"--sentinel")) return 1;
return 0;
}
与redis-server
默认的6379
端口号不同的是,redis-sentinel
默认运行在26379
端口。
这里使用docker-compose -f redis.yml up -d
启动一个主从复制的环境,一个master
、两个slave
和三个sentinel
。
# https://docs.docker.com/compose/compose-file/compose-file-v3/
version: '3'
services:
master.test:
image: redis:6.2
restart: on-failure
command: redis-server
volumes:
- volume_master:/data
slave1.test:
image: redis:6.2
restart: on-failure
command: redis-server --slaveof master.test 6379
depends_on:
- master.test
volumes:
- volume_slave1:/data
slave2.test:
image: redis:6.2
restart: on-failure
command: redis-server --slaveof master.test 6379
depends_on:
- master.test
volumes:
- volume_slave2:/data
sentinel1.test:
build: .
restart: on-failure
command: redis-server /etc/redis/sentinel.conf --sentinel
depends_on:
- master.test
- slave1.test
- slave2.test
sentinel2.test:
build: .
restart: on-failure
command: redis-server /etc/redis/sentinel.conf --sentinel
depends_on:
- master.test
- slave1.test
- slave2.test
sentinel3.test:
build: .
restart: on-failure
command: redis-server /etc/redis/sentinel.conf --sentinel
depends_on:
- master.test
- slave1.test
- slave2.test
volumes:
volume_master:
volume_slave1:
volume_slave2:
# https://hub.docker.com/_/redis/
# https://github.com/docker-library/redis/blob/master/6.2/Dockerfile
FROM redis:6.2
COPY --chown=redis:redis sentinel.conf /etc/redis/sentinel.conf
EXPOSE 26379
# https://github.com/redis/redis/blob/6.2/sentinel.conf
protected-mode no
port 26379
daemonize no
sentinel monitor master1 master.test 6379 2
sentinel down-after-milliseconds master1 30000
sentinel failover-timeout master1 180000
sentinel parallel-syncs master1 1
SENTINEL resolve-hostnames yes
SENTINEL announce-hostnames no
启动后进入到其中的一个sentinel中docker exec -it sentinel_sentinel1.test_1 redis-cli -p 26379
:
# 查看启动后的redis主从复制+Sentinel的容器
docker-compose -f redis.yml ps
Name Command State Ports
----------------------------------------------------------------------------------------
sentinel_master.test_1 docker-entrypoint.sh redis ... Up 6379/tcp
sentinel_slave1.test_1 docker-entrypoint.sh redis ... Up 6379/tcp
sentinel_slave2.test_1 docker-entrypoint.sh redis ... Up 6379/tcp
sentinel_sentinel1.test_1 docker-entrypoint.sh redis ... Up 26379/tcp, 6379/tcp
sentinel_sentinel2.test_1 docker-entrypoint.sh redis ... Up 26379/tcp, 6379/tcp
sentinel_sentinel3.test_1 docker-entrypoint.sh redis ... Up 26379/tcp, 6379/tcp
# 进入其中一个sentinel
docker exec -it sentinel_sentinel1.test_1 redis-cli -p 26379
127.0.0.1:26379> SENTINEL master master1
1) "name"
2) "master1"
3) "ip"
4) "172.20.0.2"
5) "port"
6) "6379"
7) "runid"
8) "f12d1ec60110762a0c8d04e1a41e5a35475cd180"
9) "flags"
10) "master"
11) "link-pending-commands"
12) "0"
13) "link-refcount"
14) "1"
15) "last-ping-sent"
16) "0"
17) "last-ok-ping-reply"
18) "954"
19) "last-ping-reply"
20) "954"
21) "down-after-milliseconds"
22) "30000"
23) "info-refresh"
24) "1008"
25) "role-reported"
26) "master"
27) "role-reported-time"
28) "231902"
29) "config-epoch"
30) "0"
31) "num-slaves"
32) "2"
33) "num-other-sentinels"
34) "2"
35) "quorum"
36) "2"
37) "failover-timeout"
38) "180000"
39) "parallel-syncs"
40) "1"
127.0.0.1:26379> SENTINEL replicas master1
1) 1) "name"
2) "172.20.0.3:6379"
3) "ip"
4) "172.20.0.3"
5) "port"
6) "6379"
7) "runid"
8) "1af97ddae7eccd5fca707b202188498d990d8cb8"
9) "flags"å
10) "slave"
11) "link-pending-commands"
12) "0"
13) "link-refcount"
14) "1"
15) "last-ping-sent"
16) "0"
17) "last-ok-ping-reply"
18) "11"
19) "last-ping-reply"
20) "11"
21) "down-after-milliseconds"
22) "30000"
23) "info-refresh"
24) "7516"
25) "role-reported"
26) "slave"
27) "role-reported-time"
28) "278534"
29) "master-link-down-time"
30) "0"
31) "master-link-status"
32) "ok"
33) "master-host"
34) "master.test"
35) "master-port"
36) "6379"
37) "slave-priority"
38) "100"
39) "slave-repl-offset"
40) "54607"
2) 1) "name"
2) "172.20.0.4:6379"
3) "ip"
4) "172.20.0.4"
5) "port"
6) "6379"
7) "runid"
8) "4d3128c95909fab2969f508247906fe2cbd74837"
9) "flags"
10) "slave"
11) "link-pending-commands"
12) "0"
13) "link-refcount"
14) "1"
15) "last-ping-sent"
16) "0"
17) "last-ok-ping-reply"
18) "11"
19) "last-ping-reply"
20) "11"
21) "down-after-milliseconds"
22) "30000"
23) "info-refresh"
24) "7517"
25) "role-reported"
26) "slave"
27) "role-reported-time"
28) "278528"
29) "master-link-down-time"
30) "0"
31) "master-link-status"
32) "ok"
33) "master-host"
34) "master.test"
35) "master-port"
36) "6379"
37) "slave-priority"
38) "100"
39) "slave-repl-offset"
40) "54607"
127.0.0.1:26379> SENTINEL sentinels master1
1) 1) "name"
2) "3aaba35f1c2307febfc42a8587370d87b633cf4e"
3) "ip"
4) "172.20.0.5"
5) "port"
6) "26379"
7) "runid"
8) "3aaba35f1c2307febfc42a8587370d87b633cf4e"
9) "flags"
10) "sentinel"
11) "link-pending-commands"
12) "0"
13) "link-refcount"
14) "1"
15) "last-ping-sent"
16) "0"
17) "last-ok-ping-reply"
18) "934"
19) "last-ping-reply"
20) "934"
21) "down-after-milliseconds"
22) "30000"
23) "last-hello-message"
24) "532"
25) "voted-leader"
26) "?"
27) "voted-leader-epoch"
28) "0"
2) 1) "name"
2) "bee5b59d166a278cd66dc147a21a25ba8e9e566c"
3) "ip"
4) "172.20.0.6"
5) "port"
6) "26379"
7) "runid"
8) "bee5b59d166a278cd66dc147a21a25ba8e9e566c"
9) "flags"
10) "sentinel"
11) "link-pending-commands"
12) "0"
13) "link-refcount"
14) "1"
15) "last-ping-sent"
16) "0"
17) "last-ok-ping-reply"
18) "316"
19) "last-ping-reply"
20) "316"
21) "down-after-milliseconds"
22) "30000"
23) "last-hello-message"
24) "1353"
25) "voted-leader"
26) "?"
27) "voted-leader-epoch"
28) "0"
2 运行原理
3 详细配置
# 配置监控的master、ip、port和ODOWN的人数,并起一个名字
# sentinel monitor <master-name> <ip> <redis-port> <quorum>
sentinel monitor master1 master.test 6379 2
# master密码
# sentinel auth-pass <master-name> <password>
# 判定SDOWN的时间间隔
# sentinel down-after-milliseconds <master-name> <milliseconds>
sentinel down-after-milliseconds master1 30000
# sentinel failover-timeout <master-name> <milliseconds>
sentinel failover-timeout master1 180000
# sentinel parallel-syncs <master-name> <numreplicas>
sentinel parallel-syncs master1 1
protected-mode no
# 端口号
port 26379
# 以daemon方式运行时会写入一个/var/run/redis-sentinel.pid文件
daemonize no
# 自定义pid文件路径
pidfile /var/run/redis-sentinel.pid
# 日志文件地址,默认/dev/null
logfile ""
# NAT网络环境中配置的IP和Port
sentinel announce-ip 1.2.3.4
sentinel announce-port 26379
# 其中主机名代替IP
SENTINEL resolve-hostnames yes
SENTINEL announce-hostnames no