Skip to content

Commit 7191212

Browse files
Splunkenterprisereceiver add health metric (#36695) (#36695)
<!--Ex. Fixing a bug - Describe the bug and how this fixes the issue. Ex. Adding a feature - Explain what this achieves.--> #### Description Adds `splunk.health` metric that uses the introspection endpoint at `/services/server/health/splunkd/details`. The metric features 2 attributes: `splunk.feature`: Describes the part of the server's feature `splunk.feature.health`: Describes (in red, yellow, green) the health status of the given feature The metric will remain a value of 1 unless the feature's health becomes 'red.' <!--Describe what testing was performed and which tests were added.--> #### Testing Built a custom version of splunk-otel-collector that referenced this code. Then ran it on a Splunk stack, and received metrics out. Also created generated tests. <!--Describe the documentation added.--> #### Documentation Added an entry to `documentation.md` describing the new metric. --------- Co-authored-by: Sean Marciniak <[email protected]>
1 parent ad1be0b commit 7191212

File tree

10 files changed

+212
-2
lines changed

10 files changed

+212
-2
lines changed

.chloggen/changes.yaml

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: 'enhancement'
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
7+
component: 'splunkenterprisereceiver'
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: "Added a new `splunk.health` metric."
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [36695]
14+
15+
# If your change doesn't affect end users or the exported elements of any package,
16+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
17+
# Optional: The change log or logs in which this entry should be included.
18+
# e.g. '[user]' or '[user, api]'
19+
# Include 'user' if the change is relevant to end users.
20+
# Include 'api' if there is a change to a library API.
21+
# Default: '[user]'
22+
change_logs: [user]

receiver/splunkenterprisereceiver/documentation.md

+15
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,21 @@ Size in bytes on disk of this index *Note:** Must be pointed at specific indexer
390390
| ---- | ----------- | ------ |
391391
| splunk.index.name | The name of the index reporting a specific KPI | Any Str |
392392

393+
### splunk.health
394+
395+
The status ('red', 'yellow', or 'green') of the Splunk server. Health of 'red' produces a 0 while all other colors produce a 1.
396+
397+
| Unit | Metric Type | Value Type |
398+
| ---- | ----------- | ---------- |
399+
| {status} | Gauge | Int |
400+
401+
#### Attributes
402+
403+
| Name | Description | Values |
404+
| ---- | ----------- | ------ |
405+
| splunk.feature | The Feature name from the Splunk Health Introspection Endpoint | Any Str |
406+
| splunk.feature.health | The Health (in color form) of a Splunk Feature from the Splunk Health Introspection Endpoint | Any Str |
407+
393408
### splunk.indexer.throughput
394409

395410
Gauge tracking average bytes per second throughput of indexer. *Note:** Must be pointed at specific indexer `endpoint` and gathers metrics from only that indexer.

receiver/splunkenterprisereceiver/internal/metadata/generated_config.go

+4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

receiver/splunkenterprisereceiver/internal/metadata/generated_config_test.go

+2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

receiver/splunkenterprisereceiver/internal/metadata/generated_metrics.go

+60
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

receiver/splunkenterprisereceiver/internal/metadata/generated_metrics_test.go

+21
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

receiver/splunkenterprisereceiver/internal/metadata/testdata/config.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ all_set:
1919
enabled: true
2020
splunk.data.indexes.extended.total.size:
2121
enabled: true
22+
splunk.health:
23+
enabled: true
2224
splunk.indexer.avg.rate:
2325
enabled: true
2426
splunk.indexer.cpu.time:
@@ -101,6 +103,8 @@ none_set:
101103
enabled: false
102104
splunk.data.indexes.extended.total.size:
103105
enabled: false
106+
splunk.health:
107+
enabled: false
104108
splunk.indexer.avg.rate:
105109
enabled: false
106110
splunk.indexer.cpu.time:

receiver/splunkenterprisereceiver/metadata.yaml

+14
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,12 @@ attributes:
3939
splunk.searchartifacts.cache.type:
4040
description: The search artifacts cache type
4141
type: string
42+
splunk.feature:
43+
description: The Feature name from the Splunk Health Introspection Endpoint
44+
type: string
45+
splunk.feature.health:
46+
description: The Health (in color form) of a Splunk Feature from the Splunk Health Introspection Endpoint
47+
type: string
4248

4349
metrics:
4450
splunk.license.index.usage:
@@ -345,6 +351,14 @@ metrics:
345351
aggregation_temporality: cumulative
346352
value_type: int
347353
attributes: [splunk.host]
354+
#`services/server/health/splunkd/details`
355+
splunk.health:
356+
enabled: false
357+
description: The status ('red', 'yellow', or 'green') of the Splunk server. Health of 'red' produces a 0 while all other colors produce a 1.
358+
unit: "{status}"
359+
gauge:
360+
value_type: int
361+
attributes: [splunk.feature, splunk.feature.health]
348362

349363
tests:
350364
config:

receiver/splunkenterprisereceiver/scraper.go

+55-2
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ func (s *splunkScraper) scrape(ctx context.Context) (pmetric.Metrics, error) {
101101
s.scrapeIndexerAvgRate,
102102
s.scrapeKVStoreStatus,
103103
s.scrapeSearchArtifacts,
104+
s.scrapeHealth,
104105
}
105106
errChan := make(chan error, len(metricScrapes))
106107

@@ -1075,12 +1076,12 @@ func unmarshallSearchReq(res *http.Response, sr *searchResponse) error {
10751076

10761077
body, err := io.ReadAll(res.Body)
10771078
if err != nil {
1078-
return fmt.Errorf("Failed to read response: %w", err)
1079+
return fmt.Errorf("failed to read response: %w", err)
10791080
}
10801081

10811082
err = xml.Unmarshal(body, &sr)
10821083
if err != nil {
1083-
return fmt.Errorf("Failed to unmarshall response: %w", err)
1084+
return fmt.Errorf("failed to unmarshall response: %w", err)
10841085
}
10851086

10861087
return nil
@@ -1733,3 +1734,55 @@ func (s *splunkScraper) scrapeSearchArtifacts(ctx context.Context, now pcommon.T
17331734
}
17341735
}
17351736
}
1737+
1738+
// Scrape Health Introspection Endpoint
1739+
func (s *splunkScraper) scrapeHealth(ctx context.Context, now pcommon.Timestamp, errs chan error) {
1740+
if !s.conf.MetricsBuilderConfig.Metrics.SplunkHealth.Enabled {
1741+
return
1742+
}
1743+
1744+
ctx = context.WithValue(ctx, endpointType("type"), typeCm)
1745+
1746+
ept := apiDict[`SplunkHealth`]
1747+
var ha healthArtifacts
1748+
1749+
req, err := s.splunkClient.createAPIRequest(ctx, ept)
1750+
if err != nil {
1751+
errs <- err
1752+
return
1753+
}
1754+
1755+
res, err := s.splunkClient.makeRequest(req)
1756+
if err != nil {
1757+
errs <- err
1758+
return
1759+
}
1760+
defer res.Body.Close()
1761+
1762+
if err := json.NewDecoder(res.Body).Decode(&ha); err != nil {
1763+
errs <- err
1764+
return
1765+
}
1766+
1767+
s.settings.Logger.Debug(fmt.Sprintf("Features: %s", ha.Entries))
1768+
for _, details := range ha.Entries {
1769+
s.traverseHealthDetailFeatures(details.Content, now)
1770+
}
1771+
}
1772+
1773+
func (s *splunkScraper) traverseHealthDetailFeatures(details healthDetails, now pcommon.Timestamp) {
1774+
if details.Features == nil {
1775+
return
1776+
}
1777+
1778+
for k, feature := range details.Features {
1779+
if feature.Health != "red" {
1780+
s.settings.Logger.Debug(feature.Health)
1781+
s.mb.RecordSplunkHealthDataPoint(now, 1, k, feature.Health)
1782+
} else {
1783+
s.settings.Logger.Debug(feature.Health)
1784+
s.mb.RecordSplunkHealthDataPoint(now, 0, k, feature.Health)
1785+
}
1786+
s.traverseHealthDetailFeatures(feature, now)
1787+
}
1788+
}

receiver/splunkenterprisereceiver/search_result.go

+15
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ var apiDict = map[string]string{
2525
`SplunkIntrospectionQueues`: `/services/server/introspection/queues?output_mode=json&count=-1`,
2626
`SplunkKVStoreStatus`: `/services/kvstore/status?output_mode=json`,
2727
`SplunkDispatchArtifacts`: `/services/server/status/dispatch-artifacts?output_mode=json&count=-1`,
28+
`SplunkHealth`: `/services/server/health/splunkd/details?output_mode=json`,
2829
}
2930

3031
type searchResponse struct {
@@ -156,3 +157,17 @@ type DispatchArtifactContent struct {
156157
StatusCacheSize string `json:"cached_job_status_status_csv_size_mb"`
157158
CacheTotalEntries string `json:"cached_job_status_total_entries"`
158159
}
160+
161+
// '/services/server/health/splunkd/details
162+
type healthArtifacts struct {
163+
Entries []healthArtifactEntry `json:"entry"`
164+
}
165+
166+
type healthArtifactEntry struct {
167+
Content healthDetails `json:"content"`
168+
}
169+
170+
type healthDetails struct {
171+
Health string `json:"health"`
172+
Features map[string]healthDetails `json:"features,omitempty"`
173+
}

0 commit comments

Comments
 (0)