Skip to content

Commit 76c026c

Browse files
atoulmejinja2
andauthored
istio k8s discovery (#5854)
* Spike: test istio k8s discovery * add more metrics * Spike: test istio k8s discovery * add more metrics * update discover rules and config * add discovery test for istio * changelog * fix test * fix lint * add doc * fix test * multiline the config and check more attrs * add metrics to keep * update metric for envoy to a more common one * disable prometheus/istio receiver * update changelog --------- Co-authored-by: Jina Jain <[email protected]>
1 parent 45dff65 commit 76c026c

26 files changed

+899
-20
lines changed
+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
kind: Cluster
2+
apiVersion: kind.x-k8s.io/v1alpha4
3+
nodes:
4+
- role: control-plane
5+
kubeadmConfigPatches:
6+
- |
7+
kind: InitConfiguration
8+
nodeRegistration:
9+
kubeletExtraArgs:
10+
node-labels: "ingress-ready=true"
11+
- |
12+
kind: KubeletConfiguration
13+
serverTLSBootstrap: true
14+
extraPortMappings:
15+
- containerPort: 80
16+
hostPort: 80
17+
protocol: TCP
18+
- containerPort: 443
19+
hostPort: 443
20+
protocol: TCP

.github/workflows/integration-test.yml

+11-4
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ jobs:
399399
id: get-matrix-k8s
400400
run: |
401401
includes=""
402-
for service in "envoy"; do
402+
for service in "envoy" "istio"; do
403403
for arch in "amd64"; do
404404
includes="${includes},{\"SERVICE\": \"${service}\", \"ARCH\": \"${arch}\"}"
405405
done
@@ -425,9 +425,13 @@ jobs:
425425
node_image: kindest/node:v1.30.0
426426
kubectl_version: v1.30.0
427427
cluster_name: kind
428+
config: ./.github/workflows/configs/kind-config.yaml
428429
- name: Deploy service under test
430+
if: ${{ matrix.SERVICE != 'istio' }}
429431
run: |
430-
kubectl apply -f k8s/${{ matrix.SERVICE }}/*.yaml
432+
for f in k8s/${{ matrix.SERVICE }}/*.sh; do
433+
bash "$f"
434+
done
431435
- uses: actions/setup-go@v5
432436
with:
433437
go-version: ${{ env.GO_VERSION }}
@@ -436,6 +440,9 @@ jobs:
436440
with:
437441
name: docker-otelcol-${{ matrix.ARCH }}
438442
path: ./docker-otelcol/${{ matrix.ARCH }}
443+
- name: Fix kubelet TLS server certificates
444+
run: |
445+
kubectl get csr -o=jsonpath='{range.items[?(@.spec.signerName=="kubernetes.io/kubelet-serving")]}{.metadata.name}{" "}{end}' | xargs kubectl certificate approve
439446
- run: docker load -i ./docker-otelcol/${{ matrix.ARCH }}/image.tar
440447
- name: Load Docker image in kind
441448
run: |
@@ -445,5 +452,5 @@ jobs:
445452
- name: Print logs
446453
if: failure()
447454
run: |
448-
kubectl get pods
449-
kubectl logs $(kubectl get pod -l app=otelcol -o jsonpath="{.items[0].metadata.name}")
455+
kubectl get pods -A
456+
kubectl get pod -A -l app=otelcol -o jsonpath="{range .items[*]}{.metadata.namespace} {.metadata.name}{'\n'}{end}" | xargs -r -n2 sh -c 'kubectl logs -n $0 $1'

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,6 @@ tests/installation/testdata/systemd/splunk-otel-collector.conf
5959

6060
# For convenience excluding sarif files generated by ./.github/workflows/scripts/govulncheck-run.sh
6161
/govulncheck/
62+
63+
# temp istio installation files
64+
tests/receivers/istio/istio-*/

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ releases where appropriate.
2727

2828
### 💡 Enhancements 💡
2929

30+
- (Splunk) Add a new discovery bundle for Istio metrics which includes proxy, gateway, and pilot/istiod ([#5854](https://github.com/signalfx/splunk-otel-collector/pull/5854))
31+
- This discovery receiver, named prometheus/istio, is disabled by default. Users can enable it by setting the discovery config `splunk.discovery.receivers.prometheus/istio.enabled=true`.
3032
- (Splunk) Update `splunk-otel-javaagent` to `v2.14.0` ([#6000](https://github.com/signalfx/splunk-otel-collector/pull/6000))
3133
- (Splunk) Update `jmx-metric-gatherer` to `v1.45.0` ([#5995](https://github.com/signalfx/splunk-otel-collector/pull/5995))
3234
- (Splunk) Use direct connection for MongoDB discovery ([#6042](https://github.com/signalfx/splunk-otel-collector/pull/6042))

Makefile

+4
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,10 @@ smartagent-integration-test:
130130
integration-test-envoy-discovery-k8s:
131131
@set -e; cd tests && $(GOTEST_SERIAL) $(BUILD_INFO_TESTS) --tags=discovery_integration_envoy_k8s -v -timeout 5m -count 1 ./...
132132

133+
.PHONY: integration-test-istio-discovery-k8s
134+
integration-test-istio-discovery-k8s:
135+
@set -e; cd tests && $(GOTEST_SERIAL) $(BUILD_INFO_TESTS) --tags=discovery_integration_istio_k8s -v -timeout 15m -count 1 ./...
136+
133137
.PHONY: gotest-with-codecov
134138
gotest-with-cover:
135139
@$(MAKE) for-all-target TARGET="test-with-codecov"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
#####################################################################################
2+
# This file is generated by the Splunk Distribution of the OpenTelemetry Collector. #
3+
# #
4+
# It reflects the default configuration bundled in the Collector executable for use #
5+
# in discovery mode (--discovery) and is provided for reference or customization. #
6+
# Please note that any changes made to this file will need to be reconciled during #
7+
# upgrades of the Collector. #
8+
#####################################################################################
9+
# prometheus/istio:
10+
# enabled: false
11+
# rule:
12+
# k8s_observer: type == "pod" and ("istio.io/rev" in annotations or labels["istio"] == "pilot" or name matches "istio.*")
13+
# config:
14+
# default:
15+
# config:
16+
# scrape_configs:
17+
# - job_name: 'istio'
18+
# metrics_path: '`"prometheus.io/path" in annotations ? annotations["prometheus.io/path"] : "/metrics"`'
19+
# scrape_interval: 10s
20+
# static_configs:
21+
# - targets: ['`endpoint`:`"prometheus.io/port" in annotations ? annotations["prometheus.io/port"] : 15090`']
22+
# metric_relabel_configs:
23+
# - source_labels: [__name__]
24+
# action: keep
25+
# regex: "(envoy_cluster_lb_healthy_panic|\
26+
# envoy_cluster_manager_warming_clusters|\
27+
# envoy_cluster_membership_healthy|\
28+
# envoy_cluster_membership_total|\
29+
# envoy_cluster_ssl_handshake|\
30+
# envoy_cluster_ssl_session_reused|\
31+
# envoy_cluster_ssl_versions_TLSv1_2|\
32+
# envoy_cluster_ssl_versions_TLSv1_3|\
33+
# envoy_cluster_upstream_cx_active|\
34+
# envoy_cluster_upstream_cx_close_notify|\
35+
# envoy_cluster_upstream_cx_connect_attempts_exceeded|\
36+
# envoy_cluster_upstream_cx_connect_ms_sum|\
37+
# envoy_cluster_upstream_cx_connect_timeout|\
38+
# envoy_cluster_upstream_cx_destroy_local_with_active_rq|\
39+
# envoy_cluster_upstream_cx_http1_total|\
40+
# envoy_cluster_upstream_cx_http2_total|\
41+
# envoy_cluster_upstream_cx_idle_timeout|\
42+
# envoy_cluster_upstream_cx_max_requests|\
43+
# envoy_cluster_upstream_cx_none_healthy|\
44+
# envoy_cluster_upstream_cx_pool_overflow|\
45+
# envoy_cluster_upstream_cx_protocol_error|\
46+
# envoy_cluster_upstream_cx_total|\
47+
# envoy_cluster_upstream_rq_4xx|\
48+
# envoy_cluster_upstream_rq_5xx|\
49+
# envoy_cluster_upstream_rq_active|\
50+
# envoy_cluster_upstream_rq_cancelled|\
51+
# envoy_cluster_upstream_rq_completed|\
52+
# envoy_cluster_upstream_rq_pending_active|\
53+
# envoy_cluster_upstream_rq_retry|\
54+
# envoy_cluster_upstream_rq_retry_limit_exceeded|\
55+
# envoy_cluster_upstream_rq_timeout|\
56+
# envoy_cluster_upstream_rq_tx_reset|\
57+
# envoy_cluster_upstream_rq_time|\
58+
# envoy_cluster_upstream_rq_xx|\
59+
# envoy_listener_downstream_cx_total|\
60+
# envoy_listener_ssl_versions_TLSv1_2|\
61+
# envoy_listener_ssl_versions_TLSv1_3|\
62+
# envoy_server_live|\
63+
# envoy_server_memory_allocated|\
64+
# envoy_server_memory_heap_size|\
65+
# envoy_server_total_connections|\
66+
# envoy_server_uptime|\
67+
# istio_mesh_connections_from_logs|\
68+
# istio_monitor_pods_without_sidecars|\
69+
# istio_request_bytes|\
70+
# istio_request_duration_milliseconds|\
71+
# istio_request_messages_total|\
72+
# istio_requests_total|\
73+
# istio_response_messages_total|\
74+
# istio_tcp_connections_closed_total|\
75+
# istio_tcp_connections_opened_total|\
76+
# istio_tcp_received_bytes_total|\
77+
# istio_tcp_response_bytes_total|\
78+
# pilot_conflict_inbound_listener|\
79+
# pilot_eds_no_instances|\
80+
# pilot_k8s_cfg_events|\
81+
# pilot_k8s_endpoints_pending_pod|\
82+
# pilot_k8s_endpoints_with_no_pods|\
83+
# pilot_no_ip|\
84+
# pilot_proxy_convergence_time|\
85+
# pilot_proxy_queue_time|\
86+
# pilot_services|\
87+
# pilot_xds_cds_reject|\
88+
# pilot_xds_eds_reject|\
89+
# pilot_xds_expired_nonce|\
90+
# pilot_xds_lds_reject|\
91+
# pilot_xds_push_context_errors|\
92+
# pilot_xds_push_time|\
93+
# pilot_xds_rds_reject|\
94+
# pilot_xds_send_time|\
95+
# pilot_xds_write_timeout)(?:_sum|_count|_bucket)?"
96+
# status:
97+
# metrics:
98+
# - status: successful
99+
# strict: envoy_server_uptime
100+
# message: istio prometheus receiver is working for istio-proxy!
101+
# - status: successful
102+
# strict: pilot_services
103+
# message: istio prometheus receiver is working for istiod!
104+
# statements:
105+
# - status: failed
106+
# regexp: "connection refused"
107+
# message: The container is not serving http connections.
108+
# - status: failed
109+
# regexp: "dial tcp: lookup"
110+
# message: Unable to resolve istio prometheus tcp endpoint
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
#####################################################################################
2+
# Do not edit manually! #
3+
# All changes must be made to associated .tmpl file before running 'make bundle.d'. #
4+
#####################################################################################
5+
prometheus/istio:
6+
enabled: false
7+
rule:
8+
k8s_observer: type == "pod" and ("istio.io/rev" in annotations or labels["istio"] == "pilot" or name matches "istio.*")
9+
config:
10+
default:
11+
config:
12+
scrape_configs:
13+
- job_name: 'istio'
14+
metrics_path: '`"prometheus.io/path" in annotations ? annotations["prometheus.io/path"] : "/metrics"`'
15+
scrape_interval: 10s
16+
static_configs:
17+
- targets: ['`endpoint`:`"prometheus.io/port" in annotations ? annotations["prometheus.io/port"] : 15090`']
18+
metric_relabel_configs:
19+
- source_labels: [__name__]
20+
action: keep
21+
regex: "(envoy_cluster_lb_healthy_panic|\
22+
envoy_cluster_manager_warming_clusters|\
23+
envoy_cluster_membership_healthy|\
24+
envoy_cluster_membership_total|\
25+
envoy_cluster_ssl_handshake|\
26+
envoy_cluster_ssl_session_reused|\
27+
envoy_cluster_ssl_versions_TLSv1_2|\
28+
envoy_cluster_ssl_versions_TLSv1_3|\
29+
envoy_cluster_upstream_cx_active|\
30+
envoy_cluster_upstream_cx_close_notify|\
31+
envoy_cluster_upstream_cx_connect_attempts_exceeded|\
32+
envoy_cluster_upstream_cx_connect_ms_sum|\
33+
envoy_cluster_upstream_cx_connect_timeout|\
34+
envoy_cluster_upstream_cx_destroy_local_with_active_rq|\
35+
envoy_cluster_upstream_cx_http1_total|\
36+
envoy_cluster_upstream_cx_http2_total|\
37+
envoy_cluster_upstream_cx_idle_timeout|\
38+
envoy_cluster_upstream_cx_max_requests|\
39+
envoy_cluster_upstream_cx_none_healthy|\
40+
envoy_cluster_upstream_cx_pool_overflow|\
41+
envoy_cluster_upstream_cx_protocol_error|\
42+
envoy_cluster_upstream_cx_total|\
43+
envoy_cluster_upstream_rq_4xx|\
44+
envoy_cluster_upstream_rq_5xx|\
45+
envoy_cluster_upstream_rq_active|\
46+
envoy_cluster_upstream_rq_cancelled|\
47+
envoy_cluster_upstream_rq_completed|\
48+
envoy_cluster_upstream_rq_pending_active|\
49+
envoy_cluster_upstream_rq_retry|\
50+
envoy_cluster_upstream_rq_retry_limit_exceeded|\
51+
envoy_cluster_upstream_rq_timeout|\
52+
envoy_cluster_upstream_rq_tx_reset|\
53+
envoy_cluster_upstream_rq_time|\
54+
envoy_cluster_upstream_rq_xx|\
55+
envoy_listener_downstream_cx_total|\
56+
envoy_listener_ssl_versions_TLSv1_2|\
57+
envoy_listener_ssl_versions_TLSv1_3|\
58+
envoy_server_live|\
59+
envoy_server_memory_allocated|\
60+
envoy_server_memory_heap_size|\
61+
envoy_server_total_connections|\
62+
envoy_server_uptime|\
63+
istio_mesh_connections_from_logs|\
64+
istio_monitor_pods_without_sidecars|\
65+
istio_request_bytes|\
66+
istio_request_duration_milliseconds|\
67+
istio_request_messages_total|\
68+
istio_requests_total|\
69+
istio_response_messages_total|\
70+
istio_tcp_connections_closed_total|\
71+
istio_tcp_connections_opened_total|\
72+
istio_tcp_received_bytes_total|\
73+
istio_tcp_response_bytes_total|\
74+
pilot_conflict_inbound_listener|\
75+
pilot_eds_no_instances|\
76+
pilot_k8s_cfg_events|\
77+
pilot_k8s_endpoints_pending_pod|\
78+
pilot_k8s_endpoints_with_no_pods|\
79+
pilot_no_ip|\
80+
pilot_proxy_convergence_time|\
81+
pilot_proxy_queue_time|\
82+
pilot_services|\
83+
pilot_xds_cds_reject|\
84+
pilot_xds_eds_reject|\
85+
pilot_xds_expired_nonce|\
86+
pilot_xds_lds_reject|\
87+
pilot_xds_push_context_errors|\
88+
pilot_xds_push_time|\
89+
pilot_xds_rds_reject|\
90+
pilot_xds_send_time|\
91+
pilot_xds_write_timeout)(?:_sum|_count|_bucket)?"
92+
status:
93+
metrics:
94+
- status: successful
95+
strict: envoy_server_uptime
96+
message: istio prometheus receiver is working for istio-proxy!
97+
- status: successful
98+
strict: pilot_services
99+
message: istio prometheus receiver is working for istiod!
100+
statements:
101+
- status: failed
102+
regexp: "connection refused"
103+
message: The container is not serving http connections.
104+
- status: failed
105+
regexp: "dial tcp: lookup"
106+
message: Unable to resolve istio prometheus tcp endpoint

0 commit comments

Comments
 (0)