Timetombs

泛义的工具是文明的基础,而确指的工具却是愚人的器物

66h / 118a
,更新于 2024-11-10T12:35:26Z+08:00 by   330589a

[Redis] sentinel

版权声明 - CC BY-NC-SA 4.0

在上一篇博客Replication1中我们构建了一个简单的主从复制架构的redis服务,利用Replication我们可以让redis提供读写分离、提供读取能力、数据备份等功能,也支持slave从鼓掌中恢复。但是如果master出现了故障,那么整Replication都会处于一个不可用的状态。这显然无法满足高可用的目标。而整个目标需要借助sentinel2来实现。

sentinel概述:

  1. 监控:sentinel是一个分布式系统,多个实例利用gossip协议协同工作。sentinel监控master和slave实例,同时sentinel实例之间也互相监控。
  2. 通知: 当某一个实例出现问题时,sentinel可以通过API通知系统管理员。
  3. 自动故障转移:如果被监控的master出现问题,sentinel可以启动一个自动故障转移的过程,sentinel实例之间选举出来一个slave提升为master,然后配置其他slave的配置使其成为新的master的slave,并且通知client使用新的连接地址。
  4. 配置提供者:client不再之间连接到master或者slave,而是连接到sentinel,由sentinel提供redis的master和slave的地址。

1 搭建环境

运行sentinel有两种方式:

  1. redis-sentinel /path/to/sentinel.conf;
  2. redis-server /path/to/sentinel.conf --sentinel;

两种方式完全一样的(通常redis-sentinel文件是redis-server的一个符号连接,redis-server启动时会做如下检查):

/* Returns 1 if there is --sentinel among the arguments or if
 * argv[0] contains "redis-sentinel". */
int checkForSentinelMode(int argc, char **argv) {
    int j;
    if (strstr(argv[0],"redis-sentinel") != NULL) return 1;
    for (j = 1; j < argc; j++)
        if (!strcmp(argv[j],"--sentinel")) return 1;
    return 0;
}

redis-server默认的6379端口号不同的是,redis-sentinel默认运行在26379端口。

这里使用docker-compose -f redis.yml up -d启动一个主从复制的环境,一个master、两个slave和三个sentinel

# https://docs.docker.com/compose/compose-file/compose-file-v3/
version: '3'

services:
  master.test:
    image: redis:6.2
    restart: on-failure
    command: redis-server
    volumes:
      - volume_master:/data

  slave1.test:
    image: redis:6.2
    restart: on-failure
    command: redis-server --slaveof master.test 6379
    depends_on:
      - master.test
    volumes:
      - volume_slave1:/data

  slave2.test:
    image: redis:6.2
    restart: on-failure
    command: redis-server --slaveof master.test 6379
    depends_on:
      - master.test
    volumes:
      - volume_slave2:/data

  sentinel1.test:
    build: .
    restart: on-failure
    command: redis-server /etc/redis/sentinel.conf --sentinel
    depends_on:
      - master.test
      - slave1.test
      - slave2.test

  sentinel2.test:
    build: .
    restart: on-failure
    command: redis-server /etc/redis/sentinel.conf --sentinel
    depends_on:
      - master.test
      - slave1.test
      - slave2.test

  sentinel3.test:
    build: .
    restart: on-failure
    command: redis-server /etc/redis/sentinel.conf --sentinel
    depends_on:
      - master.test
      - slave1.test
      - slave2.test

volumes:
  volume_master:
  volume_slave1:
  volume_slave2:

# https://hub.docker.com/_/redis/
# https://github.com/docker-library/redis/blob/master/6.2/Dockerfile
FROM redis:6.2

COPY --chown=redis:redis sentinel.conf /etc/redis/sentinel.conf

EXPOSE 26379
# https://github.com/redis/redis/blob/6.2/sentinel.conf
protected-mode no
port 26379
daemonize no

sentinel monitor master1 master.test 6379 2
sentinel down-after-milliseconds master1 30000
sentinel failover-timeout master1 180000
sentinel parallel-syncs master1 1

SENTINEL resolve-hostnames yes
SENTINEL announce-hostnames no

启动后进入到其中的一个sentinel中docker exec -it sentinel_sentinel1.test_1 redis-cli -p 26379:

# 查看启动后的redis主从复制+Sentinel的容器
docker-compose -f redis.yml ps

          Name                         Command               State          Ports
----------------------------------------------------------------------------------------
sentinel_master.test_1      docker-entrypoint.sh redis ...   Up      6379/tcp
sentinel_slave1.test_1      docker-entrypoint.sh redis ...   Up      6379/tcp
sentinel_slave2.test_1      docker-entrypoint.sh redis ...   Up      6379/tcp
sentinel_sentinel1.test_1   docker-entrypoint.sh redis ...   Up      26379/tcp, 6379/tcp
sentinel_sentinel2.test_1   docker-entrypoint.sh redis ...   Up      26379/tcp, 6379/tcp
sentinel_sentinel3.test_1   docker-entrypoint.sh redis ...   Up      26379/tcp, 6379/tcp

# 进入其中一个sentinel
docker exec -it sentinel_sentinel1.test_1 redis-cli -p 26379

127.0.0.1:26379> SENTINEL master master1
 1) "name"
 2) "master1"
 3) "ip"
 4) "172.20.0.2"
 5) "port"
 6) "6379"
 7) "runid"
 8) "f12d1ec60110762a0c8d04e1a41e5a35475cd180"
 9) "flags"
10) "master"
11) "link-pending-commands"
12) "0"
13) "link-refcount"
14) "1"
15) "last-ping-sent"
16) "0"
17) "last-ok-ping-reply"
18) "954"
19) "last-ping-reply"
20) "954"
21) "down-after-milliseconds"
22) "30000"
23) "info-refresh"
24) "1008"
25) "role-reported"
26) "master"
27) "role-reported-time"
28) "231902"
29) "config-epoch"
30) "0"
31) "num-slaves"
32) "2"
33) "num-other-sentinels"
34) "2"
35) "quorum"
36) "2"
37) "failover-timeout"
38) "180000"
39) "parallel-syncs"
40) "1"

127.0.0.1:26379> SENTINEL replicas master1
1)  1) "name"
    2) "172.20.0.3:6379"
    3) "ip"
    4) "172.20.0.3"
    5) "port"
    6) "6379"
    7) "runid"
    8) "1af97ddae7eccd5fca707b202188498d990d8cb8"
    9) "flags"å
   10) "slave"
   11) "link-pending-commands"
   12) "0"
   13) "link-refcount"
   14) "1"
   15) "last-ping-sent"
   16) "0"
   17) "last-ok-ping-reply"
   18) "11"
   19) "last-ping-reply"
   20) "11"
   21) "down-after-milliseconds"
   22) "30000"
   23) "info-refresh"
   24) "7516"
   25) "role-reported"
   26) "slave"
   27) "role-reported-time"
   28) "278534"
   29) "master-link-down-time"
   30) "0"
   31) "master-link-status"
   32) "ok"
   33) "master-host"
   34) "master.test"
   35) "master-port"
   36) "6379"
   37) "slave-priority"
   38) "100"
   39) "slave-repl-offset"
   40) "54607"
2)  1) "name"
    2) "172.20.0.4:6379"
    3) "ip"
    4) "172.20.0.4"
    5) "port"
    6) "6379"
    7) "runid"
    8) "4d3128c95909fab2969f508247906fe2cbd74837"
    9) "flags"
   10) "slave"
   11) "link-pending-commands"
   12) "0"
   13) "link-refcount"
   14) "1"
   15) "last-ping-sent"
   16) "0"
   17) "last-ok-ping-reply"
   18) "11"
   19) "last-ping-reply"
   20) "11"
   21) "down-after-milliseconds"
   22) "30000"
   23) "info-refresh"
   24) "7517"
   25) "role-reported"
   26) "slave"
   27) "role-reported-time"
   28) "278528"
   29) "master-link-down-time"
   30) "0"
   31) "master-link-status"
   32) "ok"
   33) "master-host"
   34) "master.test"
   35) "master-port"
   36) "6379"
   37) "slave-priority"
   38) "100"
   39) "slave-repl-offset"
   40) "54607"

127.0.0.1:26379> SENTINEL sentinels master1
1)  1) "name"
    2) "3aaba35f1c2307febfc42a8587370d87b633cf4e"
    3) "ip"
    4) "172.20.0.5"
    5) "port"
    6) "26379"
    7) "runid"
    8) "3aaba35f1c2307febfc42a8587370d87b633cf4e"
    9) "flags"
   10) "sentinel"
   11) "link-pending-commands"
   12) "0"
   13) "link-refcount"
   14) "1"
   15) "last-ping-sent"
   16) "0"
   17) "last-ok-ping-reply"
   18) "934"
   19) "last-ping-reply"
   20) "934"
   21) "down-after-milliseconds"
   22) "30000"
   23) "last-hello-message"
   24) "532"
   25) "voted-leader"
   26) "?"
   27) "voted-leader-epoch"
   28) "0"
2)  1) "name"
    2) "bee5b59d166a278cd66dc147a21a25ba8e9e566c"
    3) "ip"
    4) "172.20.0.6"
    5) "port"
    6) "26379"
    7) "runid"
    8) "bee5b59d166a278cd66dc147a21a25ba8e9e566c"
    9) "flags"
   10) "sentinel"
   11) "link-pending-commands"
   12) "0"
   13) "link-refcount"
   14) "1"
   15) "last-ping-sent"
   16) "0"
   17) "last-ok-ping-reply"
   18) "316"
   19) "last-ping-reply"
   20) "316"
   21) "down-after-milliseconds"
   22) "30000"
   23) "last-hello-message"
   24) "1353"
   25) "voted-leader"
   26) "?"
   27) "voted-leader-epoch"
   28) "0"

2 运行原理

3 详细配置

# 配置监控的master、ip、port和ODOWN的人数,并起一个名字
# sentinel monitor <master-name> <ip> <redis-port> <quorum>
sentinel monitor master1 master.test 6379 2
# master密码
# sentinel auth-pass <master-name> <password>

# 判定SDOWN的时间间隔
# sentinel down-after-milliseconds <master-name> <milliseconds>
sentinel down-after-milliseconds master1 30000
# sentinel failover-timeout <master-name> <milliseconds>
sentinel failover-timeout master1 180000
# sentinel parallel-syncs <master-name> <numreplicas>
sentinel parallel-syncs master1 1

protected-mode no

# 端口号
port 26379

# 以daemon方式运行时会写入一个/var/run/redis-sentinel.pid文件 
daemonize no
# 自定义pid文件路径
pidfile /var/run/redis-sentinel.pid

# 日志文件地址,默认/dev/null
logfile ""

# NAT网络环境中配置的IP和Port
sentinel announce-ip 1.2.3.4
sentinel announce-port 26379

# 其中主机名代替IP
SENTINEL resolve-hostnames yes
SENTINEL announce-hostnames no

4 参考

上一篇 : [Redis] replication