Skip to content

Commit 53eb193

Browse files
Add discovery endpoint for Redis cluster (#989)
* Add discovery endpoint for Redis cluster --------- Co-authored-by: Oliver <[email protected]>
1 parent e062c5e commit 53eb193

File tree

6 files changed

+269
-1
lines changed

6 files changed

+269
-1
lines changed

README.md

+28-1
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,33 @@ The `targets-redis-instances.json` should look something like this:
135135

136136
Prometheus uses file watches and all changes to the json file are applied immediately.
137137

138+
### Prometheus Configuration to Scrape All Nodes in a Redis Cluster
139+
140+
When using a Redis Cluster, the exporter provides a discovery endpoint that can be used to discover all nodes in the cluster.
141+
To use this feature, the exporter must be started with the `--is-cluster` flag.\
142+
The discovery endpoint is available at `/discover-cluster-nodes` and can be used in the Prometheus configuration like this:
143+
144+
```yaml
145+
scrape_configs:
146+
- job_name: 'redis_exporter_cluster_nodes'
147+
http_sd_configs:
148+
- url: http://<<REDIS-EXPORTER-HOSTNAME>>:9121/discover-cluster-nodes
149+
refresh_interval: 10m
150+
metrics_path: /scrape
151+
relabel_configs:
152+
- source_labels: [__address__]
153+
target_label: __param_target
154+
- source_labels: [__param_target]
155+
target_label: instance
156+
- target_label: __address__
157+
replacement: <<REDIS-EXPORTER-HOSTNAME>>:9121
158+
159+
## config for scraping the exporter itself
160+
- job_name: 'redis_exporter'
161+
static_configs:
162+
- targets:
163+
- <<REDIS-EXPORTER-HOSTNAME>>:9121
164+
```
138165

139166
### Command line flags
140167

@@ -207,7 +234,7 @@ Alternatively, you can provide the username and/or password using the `--redis.u
207234
If you want to use a dedicated Redis user for the redis_exporter (instead of the default user) then you need enable a list of commands for that user.
208235
You can use the following Redis command to set up the user, just replace `<<<USERNAME>>>` and `<<<PASSWORD>>>` with your desired values.
209236
```
210-
ACL SETUSER <<<USERNAME>>> -@all +@connection +memory -readonly +strlen +config|get +xinfo +pfcount -quit +zcard +type +xlen -readwrite -command +client -wait +scard +llen +hlen +get +eval +slowlog +cluster|info -hello -echo +info +latency +scan -reset -auth -asking ><<<PASSWORD>>>
237+
ACL SETUSER <<<USERNAME>>> -@all +@connection +memory -readonly +strlen +config|get +xinfo +pfcount -quit +zcard +type +xlen -readwrite -command +client -wait +scard +llen +hlen +get +eval +slowlog +cluster|info +cluster|slots +cluster|nodes -hello -echo +info +latency +scan -reset -auth -asking ><<<PASSWORD>>>
211238
```
212239
213240
For monitoring a Sentinel-node you may use the following command with the right ACL:

exporter/exporter.go

+1
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,7 @@ func NewRedisExporter(uri string, opts Options) (*Exporter, error) {
509509

510510
e.mux.HandleFunc("/", e.indexHandler)
511511
e.mux.HandleFunc("/scrape", e.scrapeHandler)
512+
e.mux.HandleFunc("/discover-cluster-nodes", e.discoverClusterNodesHandler)
512513
e.mux.HandleFunc("/health", e.healthHandler)
513514
e.mux.HandleFunc("/-/reload", e.reloadPwdFile)
514515

exporter/http.go

+49
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package exporter
22

33
import (
44
"crypto/subtle"
5+
"encoding/json"
56
"errors"
67
"fmt"
78
"net/http"
@@ -103,6 +104,54 @@ func (e *Exporter) scrapeHandler(w http.ResponseWriter, r *http.Request) {
103104
).ServeHTTP(w, r)
104105
}
105106

107+
func (e *Exporter) discoverClusterNodesHandler(w http.ResponseWriter, r *http.Request) {
108+
if !e.options.IsCluster {
109+
http.Error(w, "The discovery endpoint is only available on a redis cluster", http.StatusBadRequest)
110+
return
111+
}
112+
113+
c, err := e.connectToRedisCluster()
114+
if err != nil {
115+
http.Error(w, "Couldn't connect to redis cluster", http.StatusInternalServerError)
116+
return
117+
}
118+
defer c.Close()
119+
120+
nodes, err := e.getClusterNodes(c)
121+
if err != nil {
122+
http.Error(w, fmt.Sprintf("Failed to fetch cluster nodes: %s", err), http.StatusInternalServerError)
123+
return
124+
}
125+
126+
discovery := []struct {
127+
Targets []string `json:"targets"`
128+
Labels map[string]string `json:"labels"`
129+
}{
130+
{
131+
Targets: make([]string, len(nodes)),
132+
Labels: make(map[string]string, 0),
133+
},
134+
}
135+
136+
isTls := strings.HasPrefix(e.redisAddr, "rediss://")
137+
for i, node := range nodes {
138+
if isTls {
139+
discovery[0].Targets[i] = "rediss://" + node
140+
} else {
141+
discovery[0].Targets[i] = "redis://" + node
142+
}
143+
}
144+
145+
data, err := json.MarshalIndent(discovery, "", " ")
146+
if err != nil {
147+
http.Error(w, fmt.Sprintf("Failed to marshal discovery data: %s", err), http.StatusInternalServerError)
148+
return
149+
}
150+
151+
w.Header().Set("Content-Type", "application/json")
152+
_, _ = w.Write(data)
153+
}
154+
106155
func (e *Exporter) reloadPwdFile(w http.ResponseWriter, r *http.Request) {
107156
if e.options.RedisPwdFile == "" {
108157
http.Error(w, "There is no pwd file specified", http.StatusBadRequest)

exporter/http_test.go

+50
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,56 @@ func TestHttpHandlers(t *testing.T) {
321321
}
322322
}
323323

324+
func TestHttpDiscoverClusterNodesHandlers(t *testing.T) {
325+
if os.Getenv("TEST_REDIS_CLUSTER_MASTER_URI") == "" {
326+
t.Skipf("TEST_REDIS_CLUSTER_MASTER_URI not set - skipping")
327+
}
328+
329+
tests := []struct {
330+
path string
331+
want string
332+
isCluster bool
333+
}{
334+
{
335+
path: "/discover-cluster-nodes",
336+
want: "redis://127.0.0.1:7000",
337+
isCluster: true,
338+
},
339+
{
340+
path: "/discover-cluster-nodes",
341+
want: "redis://127.0.0.1:7001",
342+
isCluster: true,
343+
},
344+
{
345+
path: "/discover-cluster-nodes",
346+
want: "redis://127.0.0.1:7002",
347+
isCluster: true,
348+
},
349+
{
350+
path: "/discover-cluster-nodes",
351+
want: "The discovery endpoint is only available on a redis cluster",
352+
isCluster: false,
353+
},
354+
}
355+
356+
for _, tst := range tests {
357+
t.Run(fmt.Sprintf("path: %s, isCluster: %v", tst.path, tst.isCluster), func(t *testing.T) {
358+
e, _ := NewRedisExporter(os.Getenv("TEST_REDIS_CLUSTER_MASTER_URI"), Options{
359+
Namespace: "test",
360+
Registry: prometheus.NewRegistry(),
361+
IsCluster: tst.isCluster,
362+
})
363+
ts := httptest.NewServer(e)
364+
defer ts.Close()
365+
366+
body := downloadURL(t, ts.URL+tst.path)
367+
if !strings.Contains(body, tst.want) {
368+
t.Fatalf(`error, expected string "%s" in body, got body: \n\n%s`, tst.want, body)
369+
}
370+
})
371+
}
372+
}
373+
324374
func TestReloadHandlers(t *testing.T) {
325375
if os.Getenv("TEST_PWD_REDIS_URI") == "" {
326376
t.Skipf("TEST_PWD_REDIS_URI not set - skipping")

exporter/nodes.go

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
package exporter
2+
3+
import (
4+
"regexp"
5+
"strings"
6+
7+
"github.com/gomodule/redigo/redis"
8+
log "github.com/sirupsen/logrus"
9+
)
10+
11+
var reNodeAddress = regexp.MustCompile(`^(?P<ip>.+):(?P<port>\d+)@(?P<cport>\d+)(?:,(?P<hostname>.+))?`)
12+
13+
func (e *Exporter) getClusterNodes(c redis.Conn) ([]string, error) {
14+
output, err := redis.String(doRedisCmd(c, "CLUSTER", "NODES"))
15+
if err != nil {
16+
log.Errorf("Error getting cluster nodes: %s", err)
17+
return nil, err
18+
}
19+
20+
lines := strings.Split(output, "\n")
21+
nodes := []string{}
22+
23+
for _, line := range lines {
24+
if node, ok := parseClusterNodeString(line); ok {
25+
nodes = append(nodes, node)
26+
}
27+
}
28+
29+
return nodes, nil
30+
}
31+
32+
/*
33+
<id> <ip:port@cport[,hostname]> <flags> <master> <ping-sent> <pong-recv> <config-epoch> <link-state> <slot> <slot> ... <slot>
34+
eaf69c70d876558a948ba62af0884a37d42c9627 127.0.0.1:7002@17002 master - 0 1742836359057 3 connected 10923-16383
35+
*/
36+
func parseClusterNodeString(node string) (string, bool) {
37+
log.Debugf("parseClusterNodeString node: [%s]", node)
38+
39+
fields := strings.Fields(node)
40+
if len(fields) < 2 {
41+
log.Debugf("Invalid field count for node: %s", node)
42+
return "", false
43+
}
44+
45+
address := reNodeAddress.FindStringSubmatch(fields[1])
46+
if len(address) < 3 {
47+
log.Debugf("Invalid format for node address, got: %s", fields[1])
48+
return "", false
49+
}
50+
51+
return address[1] + ":" + address[2], true
52+
}

exporter/nodes_test.go

+89
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
package exporter
2+
3+
import (
4+
"os"
5+
"slices"
6+
"testing"
7+
)
8+
9+
func TestNodesGetClusterNodes(t *testing.T) {
10+
if os.Getenv("TEST_REDIS_CLUSTER_MASTER_URI") == "" {
11+
t.Skipf("TEST_REDIS_CLUSTER_MASTER_URI not set - skipping")
12+
}
13+
14+
host := os.Getenv("TEST_REDIS_CLUSTER_MASTER_URI")
15+
e, _ := NewRedisExporter(host, Options{})
16+
c, err := e.connectToRedisCluster()
17+
if err != nil {
18+
t.Fatalf("connectToRedisCluster() err: %s", err)
19+
}
20+
defer c.Close()
21+
22+
nodes, err := e.getClusterNodes(c)
23+
if err != nil {
24+
t.Fatalf("getClusterNodes() err: %s", err)
25+
}
26+
27+
tsts := []struct {
28+
node string
29+
ok bool
30+
}{
31+
{node: "127.0.0.1:7003", ok: true},
32+
{node: "127.0.0.1:7002", ok: true},
33+
{node: "127.0.0.1:7005", ok: true},
34+
{node: "127.0.0.1:7001", ok: true},
35+
{node: "127.0.0.1:7004", ok: true},
36+
{node: "127.0.0.1:7000", ok: true},
37+
38+
{node: "", ok: false},
39+
{node: " ", ok: false},
40+
{node: "127.0.0.1", ok: false},
41+
{node: "127.0.0.1:8000", ok: false},
42+
}
43+
44+
for _, tst := range tsts {
45+
t.Run(tst.node, func(t *testing.T) {
46+
found := slices.Contains(nodes, tst.node)
47+
if found != tst.ok {
48+
t.Errorf("Test failed for node: %s expected: %t, got: %t", tst.node, tst.ok, found)
49+
}
50+
})
51+
}
52+
}
53+
54+
func TestParseClusterNodeString(t *testing.T) {
55+
tsts := []struct {
56+
line string
57+
node string
58+
ok bool
59+
}{
60+
// The following are examples of the output of the CLUSTER NODES command.
61+
// https://redis.io/docs/latest/commands/cluster-nodes/
62+
{line: "07c37dfeb235213a872192d90877d0cd55635b91 127.0.0.1:30004@31004,hostname4 slave e7d1eecce10fd6bb5eb35b9f99a514335d9ba9ca 0 1426238317239 4 connected", node: "127.0.0.1:30004", ok: true},
63+
{line: "67ed2db8d677e59ec4a4cefb06858cf2a1a89fa1 127.0.0.1:30002@31002,hostname2 master - 0 1426238316232 2 connected 5461-10922", node: "127.0.0.1:30002", ok: true},
64+
{line: "292f8b365bb7edb5e285caf0b7e6ddc7265d2f4f 127.0.0.1:30003@31003,hostname3 master - 0 1426238318243 3 connected 10923-16383", node: "127.0.0.1:30003", ok: true},
65+
{line: "6ec23923021cf3ffec47632106199cb7f496ce01 127.0.0.1:30005@31005,hostname5 slave 67ed2db8d677e59ec4a4cefb06858cf2a1a89fa1 0 1426238316232 5 connected", node: "127.0.0.1:30005", ok: true},
66+
{line: "824fe116063bc5fcf9f4ffd895bc17aee7731ac3 127.0.0.1:30006@31006,hostname6 slave 292f8b365bb7edb5e285caf0b7e6ddc7265d2f4f 0 1426238317741 6 connected", node: "127.0.0.1:30006", ok: true},
67+
{line: "e7d1eecce10fd6bb5eb35b9f99a514335d9ba9ca 127.0.0.1:30001@31001,hostname1 myself,master - 0 0 1 connected 0-5460", node: "127.0.0.1:30001", ok: true},
68+
{line: "e7d1eecce10fd6bb5eb35b9f99a514335d9ba9ca 127.0.0.1:30001@31001 myself,master - 0 0 1 connected 0-5460", node: "127.0.0.1:30001", ok: true},
69+
70+
{line: "07c37dfeb235213a872192d90877d0cd55635b91", ok: false},
71+
{line: "07c37dfeb235213a872192d90877d0cd55635b91 127.0.0.1:30004,hostname4 slave", ok: false},
72+
{line: "127.0.0.1:30005,hostname5", ok: false},
73+
}
74+
75+
for _, tst := range tsts {
76+
t.Run(tst.line, func(t *testing.T) {
77+
node, ok := parseClusterNodeString(tst.line)
78+
79+
if ok != tst.ok {
80+
t.Errorf("Test failed for line: %s", tst.line)
81+
return
82+
}
83+
if node != tst.node {
84+
t.Errorf("Node not matching, expected: %s, got: %s", tst.node, node)
85+
return
86+
}
87+
})
88+
}
89+
}

0 commit comments

Comments
 (0)