From d6c6b1c3579afc6efcbea33fa66a250064e239a4 Mon Sep 17 00:00:00 2001 From: braydonk Date: Wed, 29 May 2024 16:26:28 +0000 Subject: [PATCH 01/10] hostmetrics: add WMI querying ppid featureflag Scraping metrics on Windows largely has one truly expensive operation: getting the Parent Process ID. gopsutil does this with the [CreateToolhelp32Snapshot](https://learn.microsoft.com/en-us/windows/win32/api/tlhelp32/nf-tlhelp32-createtoolhelp32snapshot) Win32 API call. This takes a snapshot of a large amount of process details and ends up being very expensive. This is causing CPU performance issues for Windows users. This PR does the following in an attempt to address this perforamnce issue: * Repurpose the handlecount package to be for querying any WMI process data and add ParentProcessID to that querying process * Add the featureflag `hostmetrics.process.wmiParentProcessID` which will enable using the WMI query for parent process ID fetching * Whether the featureflag is on or off, this PR also adds a guard around fetching the parent process ID that will only check it if the resource attribute for ppid is enabled. This means users in very resource constrained environments can opt out of that fetching either way --- .../braydonk_hostmetrics_wmi_parent_pid.yaml | 27 +++++ .../scraper/processscraper/factory.go | 9 ++ .../internal/handlecount/handles.go | 9 -- .../internal/handlecount/handles_windows.go | 76 ------------- .../internal/wmiprocinfo/manager.go | 14 +++ .../manager_others.go} | 14 +-- .../manager_others_test.go} | 6 +- .../internal/wmiprocinfo/manager_windows.go | 101 ++++++++++++++++++ .../manager_windows_test.go} | 26 +++-- .../package_test.go | 2 +- .../scraper/processscraper/process_scraper.go | 57 +++++++--- 11 files changed, 220 insertions(+), 121 deletions(-) create mode 100644 .chloggen/braydonk_hostmetrics_wmi_parent_pid.yaml delete mode 100644 receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount/handles.go delete mode 100644 receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount/handles_windows.go create mode 100644 receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/manager.go rename receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/{handlecount/handles_others.go => wmiprocinfo/manager_others.go} (56%) rename receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/{handlecount/handles_others_test.go => wmiprocinfo/manager_others_test.go} (52%) create mode 100644 receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/manager_windows.go rename receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/{handlecount/handles_windows_test.go => wmiprocinfo/manager_windows_test.go} (50%) rename receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/{handlecount => wmiprocinfo}/package_test.go (51%) diff --git a/.chloggen/braydonk_hostmetrics_wmi_parent_pid.yaml b/.chloggen/braydonk_hostmetrics_wmi_parent_pid.yaml new file mode 100644 index 0000000000000..10d841b52b77c --- /dev/null +++ b/.chloggen/braydonk_hostmetrics_wmi_parent_pid.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: hostmetricsreceiver + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Added a new featuregate hostmetrics.process.wmiParentProcessID that will use WMI to fetch parent process IDs instead of the Win32 API. + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [32947] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: This also made a change where the parent process ID will not be fetched if the resource attribute is disabled. + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/factory.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/factory.go index 941a39e572140..4db0b209c34c8 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/factory.go +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/factory.go @@ -33,6 +33,15 @@ var ( featuregate.WithRegisterReferenceURL("https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/28849"), featuregate.WithRegisterFromVersion("v0.98.0"), ) + + wmiParentProcessIDFeaturegateID = "hostmetrics.process.wmiParentProcessID" + wmiParentProcessIDFeaturegate = featuregate.GlobalRegistry().MustRegister( + wmiParentProcessIDFeaturegateID, + featuregate.StageAlpha, + featuregate.WithRegisterDescription("When enabled, on Windows the ParentProcessID will be fetched through a WMI query instead of a process snapshot."), + featuregate.WithRegisterReferenceURL("https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/32947"), + featuregate.WithRegisterFromVersion("v0.101.0"), + ) ) // Factory is the Factory for scraper. diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount/handles.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount/handles.go deleted file mode 100644 index 34d582f7b8f34..0000000000000 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount/handles.go +++ /dev/null @@ -1,9 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package handlecount // import "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount" - -type Manager interface { - Refresh() error - GetProcessHandleCount(pid int64) (uint32, error) -} diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount/handles_windows.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount/handles_windows.go deleted file mode 100644 index 2025d60597070..0000000000000 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount/handles_windows.go +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -//go:build windows - -package handlecount // import "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount" - -import ( - "errors" - "fmt" - - "github.com/yusufpapurcu/wmi" -) - -func NewManager() Manager { - return &handleCountManager{queryer: wmiHandleCountQueryer{}} -} - -var ( - ErrNoHandleCounts = errors.New("no handle counts are currently registered") - ErrNoHandleCountForProcess = errors.New("no handle count for process") -) - -type handleCountQueryer interface { - queryProcessHandleCounts() (map[int64]uint32, error) -} - -type handleCountManager struct { - queryer handleCountQueryer - handleCounts map[int64]uint32 -} - -func (m *handleCountManager) Refresh() error { - handleCounts, err := m.queryer.queryProcessHandleCounts() - if err != nil { - return err - } - m.handleCounts = handleCounts - return nil -} - -func (m *handleCountManager) GetProcessHandleCount(pid int64) (uint32, error) { - if len(m.handleCounts) == 0 { - return 0, ErrNoHandleCounts - } - handleCount, ok := m.handleCounts[pid] - if !ok { - return 0, fmt.Errorf("%w %d", ErrNoHandleCountForProcess, pid) - } - return handleCount, nil -} - -type wmiHandleCountQueryer struct{} - -//revive:disable-next-line:var-naming -type Win32_Process struct { - ProcessID int64 - HandleCount uint32 -} - -func (wmiHandleCountQueryer) queryProcessHandleCounts() (map[int64]uint32, error) { - handleCounts := []Win32_Process{} - // Creates query `get-wmiobject -query "select ProcessId, HandleCount from Win32_Process"` - // based on reflection of Win32_Process type. - q := wmi.CreateQuery(&handleCounts, "") - err := wmi.Query(q, &handleCounts) - if err != nil { - return nil, err - } - - newHandleCounts := make(map[int64]uint32, len(handleCounts)) - for _, p := range handleCounts { - newHandleCounts[p.ProcessID] = p.HandleCount - } - return newHandleCounts, nil -} diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/manager.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/manager.go new file mode 100644 index 0000000000000..898ddaf2e1672 --- /dev/null +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/manager.go @@ -0,0 +1,14 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package wmiprocinfo // import "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo" + +import "errors" + +var ErrPlatformSupport = errors.New("wmi process info collection is only supported on Windows") + +type Manager interface { + Refresh() error + GetProcessHandleCount(pid int64) (uint32, error) + GetProcessPpid(pid int64) (int64, error) +} diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount/handles_others.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/manager_others.go similarity index 56% rename from receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount/handles_others.go rename to receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/manager_others.go index c068177f5b3ce..f1fd009b4f85c 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount/handles_others.go +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/manager_others.go @@ -3,11 +3,7 @@ //go:build !windows -package handlecount // import "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount" - -import "errors" - -var ErrHandlesPlatformSupport = errors.New("process handle collection is only supported on Windows") +package wmiprocinfo // import "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo" func NewManager() Manager { return &unsupportedManager{} @@ -16,9 +12,13 @@ func NewManager() Manager { type unsupportedManager struct{} func (m *unsupportedManager) Refresh() error { - return ErrHandlesPlatformSupport + return ErrPlatformSupport } func (m *unsupportedManager) GetProcessHandleCount(_ int64) (uint32, error) { - return 0, ErrHandlesPlatformSupport + return 0, ErrPlatformSupport +} + +func (m *unsupportedManager) GetProcessPpid(pid int64) (int64, error) { + return 0, ErrPlatformSupport } diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount/handles_others_test.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/manager_others_test.go similarity index 52% rename from receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount/handles_others_test.go rename to receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/manager_others_test.go index 027de2cd0266a..1305da87a4ccf 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount/handles_others_test.go +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/manager_others_test.go @@ -3,7 +3,7 @@ //go:build !windows -package handlecount +package wmiprocinfo // import "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo" import ( "testing" @@ -15,8 +15,8 @@ func TestUnsupportedManager(t *testing.T) { m := NewManager() err := m.Refresh() - assert.ErrorIs(t, err, ErrHandlesPlatformSupport) + assert.ErrorIs(t, err, ErrPlatformSupport) _, err = m.GetProcessHandleCount(0) - assert.ErrorIs(t, err, ErrHandlesPlatformSupport) + assert.ErrorIs(t, err, ErrPlatformSupport) } diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/manager_windows.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/manager_windows.go new file mode 100644 index 0000000000000..87c46cec1d2a6 --- /dev/null +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/manager_windows.go @@ -0,0 +1,101 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//go:build windows + +package wmiprocinfo // import "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo" + +import ( + "errors" + "fmt" + + "github.com/yusufpapurcu/wmi" +) + +func NewManager() Manager { + return &wmiProcInfoManager{queryer: wmiProcessQueryer{}} +} + +var ( + ErrNoProcesses = errors.New("no process info is currently registered") + ErrProcessNotFound = errors.New("process info not found") +) + +type wmiProcInfo struct { + handleCount uint32 + ppid int64 +} + +type wmiQueryer interface { + wmiProcessQuery() (map[int64]*wmiProcInfo, error) +} + +type wmiProcInfoManager struct { + queryer wmiQueryer + processes map[int64]*wmiProcInfo +} + +func (m *wmiProcInfoManager) Refresh() error { + processes, err := m.queryer.wmiProcessQuery() + if err != nil { + return err + } + m.processes = processes + return nil +} + +func (m *wmiProcInfoManager) getProcessInfo(pid int64) (*wmiProcInfo, error) { + if len(m.processes) == 0 { + return nil, ErrNoProcesses + } + procInfo, ok := m.processes[pid] + if !ok { + return nil, fmt.Errorf("%w for %d", ErrProcessNotFound, pid) + } + return procInfo, nil +} + +func (m *wmiProcInfoManager) GetProcessHandleCount(pid int64) (uint32, error) { + procInfo, err := m.getProcessInfo(pid) + if err != nil { + return 0, err + } + return procInfo.handleCount, nil +} + +func (m *wmiProcInfoManager) GetProcessPpid(pid int64) (int64, error) { + procInfo, err := m.getProcessInfo(pid) + if err != nil { + return 0, err + } + return procInfo.ppid, nil +} + +type wmiProcessQueryer struct{} + +//revive:disable-next-line:var-naming +type Win32_Process struct { + ProcessID int64 + ParentProcessID int64 + HandleCount uint32 +} + +func (wmiProcessQueryer) wmiProcessQuery() (map[int64]*wmiProcInfo, error) { + processes := []Win32_Process{} + // Based on reflection of Win32_Process type, this creates the following query: + // `get-wmiobject -query "select ProcessId, ParentProcessId, HandleCount from Win32_Process"` + q := wmi.CreateQuery(&processes, "") + err := wmi.Query(q, &processes) + if err != nil { + return nil, err + } + + procInfos := make(map[int64]*wmiProcInfo, len(processes)) + for _, p := range processes { + procInfos[p.ProcessID] = &wmiProcInfo{ + handleCount: p.HandleCount, + ppid: p.ParentProcessID, + } + } + return procInfos, nil +} diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount/handles_windows_test.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/manager_windows_test.go similarity index 50% rename from receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount/handles_windows_test.go rename to receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/manager_windows_test.go index 9ce32bf51e519..2ae1eebbdb8c2 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount/handles_windows_test.go +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/manager_windows_test.go @@ -3,7 +3,7 @@ //go:build windows -package handlecount +package wmiprocinfo // import "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo" import ( "errors" @@ -15,9 +15,9 @@ import ( ) func TestHandleCountManager(t *testing.T) { - testInfos := map[int64]uint32{ - 1: 3, - 2: 5, + testInfos := map[int64]*wmiProcInfo{ + 1: {handleCount: 3, ppid: 10}, + 2: {handleCount: 5, ppid: 20}, } m := deterministicManagerWithInfo(testInfos) @@ -27,25 +27,33 @@ func TestHandleCountManager(t *testing.T) { assert.NoError(t, err) assert.Equal(t, uint32(3), count) + ppid, err := m.GetProcessPpid(1) + assert.NoError(t, err) + assert.Equal(t, ppid, int64(10)) + count, err = m.GetProcessHandleCount(2) assert.NoError(t, err) assert.Equal(t, uint32(5), count) + ppid, err = m.GetProcessPpid(2) + assert.NoError(t, err) + assert.Equal(t, ppid, int64(20)) + _, err = m.GetProcessHandleCount(3) - assert.ErrorIs(t, errors.Unwrap(err), ErrNoHandleCountForProcess) + assert.ErrorIs(t, errors.Unwrap(err), ErrProcessNotFound) assert.True(t, strings.Contains(err.Error(), "3")) } type mockQueryer struct { - info map[int64]uint32 + info map[int64]*wmiProcInfo } -func (s mockQueryer) queryProcessHandleCounts() (map[int64]uint32, error) { +func (s mockQueryer) wmiProcessQuery() (map[int64]*wmiProcInfo, error) { return s.info, nil } -func deterministicManagerWithInfo(info map[int64]uint32) *handleCountManager { - return &handleCountManager{ +func deterministicManagerWithInfo(info map[int64]*wmiProcInfo) *wmiProcInfoManager { + return &wmiProcInfoManager{ queryer: mockQueryer{info: info}, } } diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount/package_test.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/package_test.go similarity index 51% rename from receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount/package_test.go rename to receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/package_test.go index 3696253e9e548..4644d762f84f5 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount/package_test.go +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/package_test.go @@ -1,7 +1,7 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -package handlecount +package wmiprocinfo // import "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo" import ( "testing" diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go index 3b9a4bbee701f..3478719120d91 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go @@ -21,8 +21,8 @@ import ( "go.opentelemetry.io/collector/receiver/scrapererror" "github.com/open-telemetry/opentelemetry-collector-contrib/internal/filter/filterset" - "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/handlecount" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/metadata" + "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/processscraper/ucal" ) @@ -41,6 +41,8 @@ const ( metricsLen = cpuMetricsLen + memoryMetricsLen + diskMetricsLen + memoryUtilizationMetricsLen + pagingMetricsLen + threadMetricsLen + contextSwitchMetricsLen + fileDescriptorMetricsLen + signalMetricsLen ) +type parentPidFunc func(ctx context.Context, handle processHandle, pid int32) (int32, error) + // scraper for Process Metrics type scraper struct { settings receiver.Settings @@ -56,7 +58,9 @@ type scraper struct { getProcessCreateTime func(p processHandle, ctx context.Context) (int64, error) getProcessHandles func(context.Context) (processHandles, error) - handleCountManager handlecount.Manager + getParentPid parentPidFunc + + wmiProcInfoManager wmiprocinfo.Manager } // newProcessScraper creates a Process Scraper @@ -67,12 +71,17 @@ func newProcessScraper(settings receiver.Settings, cfg *Config) (*scraper, error getProcessCreateTime: processHandle.CreateTimeWithContext, getProcessHandles: getProcessHandlesInternal, scrapeProcessDelay: cfg.ScrapeProcessDelay, + getParentPid: parentPid, ucals: make(map[int32]*ucal.CPUUtilizationCalculator), - handleCountManager: handlecount.NewManager(), + wmiProcInfoManager: wmiprocinfo.NewManager(), } var err error + if wmiParentProcessIDFeaturegate.IsEnabled() { + scraper.getParentPid = scraper.getWMIParentPidFunc() + } + if len(cfg.Include.Names) > 0 { scraper.includeFS, err = filterset.CreateFilterSet(cfg.Include.Names, &cfg.Include.Config) if err != nil { @@ -203,7 +212,7 @@ func (s *scraper) getProcessMetadata() ([]*processMetadata, error) { var errs scrapererror.ScrapeErrors - if err := s.refreshHandleCounts(); err != nil { + if err := s.refreshWMIProcInfo(); err != nil { errs.Add(err) } @@ -264,14 +273,8 @@ func (s *scraper) getProcessMetadata() ([]*processMetadata, error) { continue } - parentPid, err := parentPid(ctx, handle, pid) - if err != nil { - errs.AddPartial(0, fmt.Errorf("error reading parent pid for process %q (pid %v): %w", executable.name, pid, err)) - } - md := &processMetadata{ pid: pid, - parentPid: parentPid, executable: executable, command: command, username: username, @@ -279,6 +282,14 @@ func (s *scraper) getProcessMetadata() ([]*processMetadata, error) { createTime: createTime, } + if s.config.ResourceAttributes.ProcessParentPid.Enabled { + ppid, err := s.getParentPid(ctx, handle, pid) + if err != nil { + errs.AddPartial(0, fmt.Errorf("error reading parent pid for process %q (pid %v): %w", executable.name, pid, err)) + } + md.parentPid = ppid + } + data = append(data, md) } @@ -424,12 +435,16 @@ func (s *scraper) scrapeAndAppendOpenFileDescriptorsMetric(ctx context.Context, return nil } -func (s *scraper) refreshHandleCounts() error { - if !s.config.MetricsBuilderConfig.Metrics.ProcessHandles.Enabled { - return nil +func (s *scraper) refreshWMIProcInfo() error { + var err error + if s.config.Metrics.ProcessHandles.Enabled || + (s.config.ResourceAttributes.ProcessParentPid.Enabled && wmiParentProcessIDFeaturegate.IsEnabled()) { + err = s.wmiProcInfoManager.Refresh() + if errors.Is(err, wmiprocinfo.ErrPlatformSupport) { + return nil + } } - - return s.handleCountManager.Refresh() + return err } func (s *scraper) scrapeAndAppendHandlesMetric(_ context.Context, now pcommon.Timestamp, pid int64) error { @@ -437,7 +452,7 @@ func (s *scraper) scrapeAndAppendHandlesMetric(_ context.Context, now pcommon.Ti return nil } - count, err := s.handleCountManager.GetProcessHandleCount(pid) + count, err := s.wmiProcInfoManager.GetProcessHandleCount(pid) if err != nil { return err } @@ -466,3 +481,13 @@ func (s *scraper) scrapeAndAppendSignalsPendingMetric(ctx context.Context, now p return nil } + +func (s *scraper) getWMIParentPidFunc() parentPidFunc { + return func(ctx context.Context, handle processHandle, pid int32) (int32, error) { + ppid64, err := s.wmiProcInfoManager.GetProcessPpid(int64(pid)) + if err != nil { + return 0, err + } + return int32(ppid64), nil + } +} From 6b4db515edb2a871b183477ff50a87293724c0b0 Mon Sep 17 00:00:00 2001 From: braydonk Date: Wed, 29 May 2024 19:41:09 +0000 Subject: [PATCH 02/10] fix lint errors --- .../processscraper/internal/wmiprocinfo/manager_others.go | 2 +- .../internal/scraper/processscraper/process_scraper.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/manager_others.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/manager_others.go index f1fd009b4f85c..92614b9cdc95d 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/manager_others.go +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/internal/wmiprocinfo/manager_others.go @@ -19,6 +19,6 @@ func (m *unsupportedManager) GetProcessHandleCount(_ int64) (uint32, error) { return 0, ErrPlatformSupport } -func (m *unsupportedManager) GetProcessPpid(pid int64) (int64, error) { +func (m *unsupportedManager) GetProcessPpid(_ int64) (int64, error) { return 0, ErrPlatformSupport } diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go index 3478719120d91..8502d27d70ec8 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go @@ -483,7 +483,7 @@ func (s *scraper) scrapeAndAppendSignalsPendingMetric(ctx context.Context, now p } func (s *scraper) getWMIParentPidFunc() parentPidFunc { - return func(ctx context.Context, handle processHandle, pid int32) (int32, error) { + return func(_ context.Context, handle processHandle, pid int32) (int32, error) { ppid64, err := s.wmiProcInfoManager.GetProcessPpid(int64(pid)) if err != nil { return 0, err From ebf6fe0c9bf35038cf68e81d03d73230b615a01e Mon Sep 17 00:00:00 2001 From: braydonk Date: Wed, 29 May 2024 20:02:18 +0000 Subject: [PATCH 03/10] fix more lint errors --- .../internal/scraper/processscraper/process_scraper.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go index 8502d27d70ec8..27753a6491da3 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go @@ -483,7 +483,7 @@ func (s *scraper) scrapeAndAppendSignalsPendingMetric(ctx context.Context, now p } func (s *scraper) getWMIParentPidFunc() parentPidFunc { - return func(_ context.Context, handle processHandle, pid int32) (int32, error) { + return func(_ context.Context, _ processHandle, pid int32) (int32, error) { ppid64, err := s.wmiProcInfoManager.GetProcessPpid(int64(pid)) if err != nil { return 0, err From 1427e9bac4cae2a81a6f640c923a9ff0266da147 Mon Sep 17 00:00:00 2001 From: braydonk Date: Sat, 21 Sep 2024 01:26:07 +0000 Subject: [PATCH 04/10] change WMI usage to a config field --- .../internal/scraper/processscraper/config.go | 5 +++ .../scraper/processscraper/factory.go | 9 ------ .../scraper/processscraper/process_scraper.go | 31 +++++++++++++++---- .../processscraper/process_scraper_test.go | 13 ++++++++ 4 files changed, 43 insertions(+), 15 deletions(-) diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/config.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/config.go index 76003bf72ce04..68fc92317fd68 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/config.go +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/config.go @@ -46,6 +46,11 @@ type Config struct { // ScrapeProcessDelay is used to indicate the minimum amount of time a process must be running // before metrics are scraped for it. The default value is 0 seconds (0s) ScrapeProcessDelay time.Duration `mapstructure:"scrape_process_delay"` + + // DisableWMI toggles the use of Windows Management Interface to fetch certain process information on Windows. + // WMI should only need be disabled in a scenario where the Windows Management Interface is not enabled on the system. + // This configuration has no effect on non-Windows systems. + DisableWMI bool `mapstructure:"disable_wmi"` } type MatchConfig struct { diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/factory.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/factory.go index 4db0b209c34c8..941a39e572140 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/factory.go +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/factory.go @@ -33,15 +33,6 @@ var ( featuregate.WithRegisterReferenceURL("https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/28849"), featuregate.WithRegisterFromVersion("v0.98.0"), ) - - wmiParentProcessIDFeaturegateID = "hostmetrics.process.wmiParentProcessID" - wmiParentProcessIDFeaturegate = featuregate.GlobalRegistry().MustRegister( - wmiParentProcessIDFeaturegateID, - featuregate.StageAlpha, - featuregate.WithRegisterDescription("When enabled, on Windows the ParentProcessID will be fetched through a WMI query instead of a process snapshot."), - featuregate.WithRegisterReferenceURL("https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/32947"), - featuregate.WithRegisterFromVersion("v0.101.0"), - ) ) // Factory is the Factory for scraper. diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go index 27753a6491da3..53c4eb986b684 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go @@ -41,6 +41,8 @@ const ( metricsLen = cpuMetricsLen + memoryMetricsLen + diskMetricsLen + memoryUtilizationMetricsLen + pagingMetricsLen + threadMetricsLen + contextSwitchMetricsLen + fileDescriptorMetricsLen + signalMetricsLen ) +var errProcessHandlesRequiresWMI = errors.New("the process.handles metric requires the use of Windows Management Interface") + type parentPidFunc func(ctx context.Context, handle processHandle, pid int32) (int32, error) // scraper for Process Metrics @@ -57,8 +59,7 @@ type scraper struct { // for mocking getProcessCreateTime func(p processHandle, ctx context.Context) (int64, error) getProcessHandles func(context.Context) (processHandles, error) - - getParentPid parentPidFunc + getParentPid parentPidFunc wmiProcInfoManager wmiprocinfo.Manager } @@ -78,8 +79,11 @@ func newProcessScraper(settings receiver.Settings, cfg *Config) (*scraper, error var err error - if wmiParentProcessIDFeaturegate.IsEnabled() { - scraper.getParentPid = scraper.getWMIParentPidFunc() + if runtime.GOOS == "windows" { + err = configureWindowsSettings(scraper) + if err != nil { + return nil, fmt.Errorf("error configuring Windows settings: %w", err) + } } if len(cfg.Include.Names) > 0 { @@ -437,8 +441,12 @@ func (s *scraper) scrapeAndAppendOpenFileDescriptorsMetric(ctx context.Context, func (s *scraper) refreshWMIProcInfo() error { var err error - if s.config.Metrics.ProcessHandles.Enabled || - (s.config.ResourceAttributes.ProcessParentPid.Enabled && wmiParentProcessIDFeaturegate.IsEnabled()) { + + if s.config.DisableWMI { + return nil + } + + if s.config.Metrics.ProcessHandles.Enabled || s.config.ResourceAttributes.ProcessParentPid.Enabled { err = s.wmiProcInfoManager.Refresh() if errors.Is(err, wmiprocinfo.ErrPlatformSupport) { return nil @@ -491,3 +499,14 @@ func (s *scraper) getWMIParentPidFunc() parentPidFunc { return int32(ppid64), nil } } + +func configureWindowsSettings(s *scraper) error { + if s.config.DisableWMI { + if s.config.Metrics.ProcessHandles.Enabled { + return errProcessHandlesRequiresWMI + } + } else { + s.getParentPid = s.getWMIParentPidFunc() + } + return nil +} diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper_test.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper_test.go index b1a232e5d7cd6..7dddb7cd24427 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper_test.go +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper_test.go @@ -348,6 +348,19 @@ func TestScrapeMetrics_NewError(t *testing.T) { require.Regexp(t, "^error creating process exclude filters:", err.Error()) } +func TestScrapeMetrics_NewError_Windows(t *testing.T) { + if runtime.GOOS != "windows" { + t.Skipf("skipping windows test on non-windows system %s", runtime.GOOS) + } + + // process.handles metric cannot be enabled if the config disables WMI + mb := metadata.DefaultMetricsBuilderConfig() + mb.Metrics.ProcessHandles.Enabled = true + _, err := newProcessScraper(receivertest.NewNopSettings(), &Config{DisableWMI: true, MetricsBuilderConfig: mb}) + require.Error(t, err) + require.ErrorIs(t, err, errProcessHandlesRequiresWMI) +} + func TestScrapeMetrics_GetProcessesError(t *testing.T) { skipTestOnUnsupportedOS(t) From ff7e77eadf02a81401367f40e853c01edb1404ed Mon Sep 17 00:00:00 2001 From: braydonk Date: Sat, 21 Sep 2024 01:31:37 +0000 Subject: [PATCH 05/10] adjust the changelog to reflect new changes --- .chloggen/braydonk_hostmetrics_wmi_parent_pid.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.chloggen/braydonk_hostmetrics_wmi_parent_pid.yaml b/.chloggen/braydonk_hostmetrics_wmi_parent_pid.yaml index 10d841b52b77c..8563f501d1eaa 100644 --- a/.chloggen/braydonk_hostmetrics_wmi_parent_pid.yaml +++ b/.chloggen/braydonk_hostmetrics_wmi_parent_pid.yaml @@ -7,7 +7,7 @@ change_type: enhancement component: hostmetricsreceiver # A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). -note: Added a new featuregate hostmetrics.process.wmiParentProcessID that will use WMI to fetch parent process IDs instead of the Win32 API. +note: Use Windows Management Interface to fetch process.ppid by default. Add `disable_wmi` config option to fallback to old behaviour. # Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. issues: [32947] @@ -24,4 +24,4 @@ subtext: This also made a change where the parent process ID will not be fetched # Include 'user' if the change is relevant to end users. # Include 'api' if there is a change to a library API. # Default: '[user]' -change_logs: [] +change_logs: [user] From e628ed8f9d63b9a2e251ebfbe89761e771cd385d Mon Sep 17 00:00:00 2001 From: braydonk Date: Sat, 21 Sep 2024 01:43:12 +0000 Subject: [PATCH 06/10] add documentation --- receiver/hostmetricsreceiver/README.md | 10 ++++++---- .../internal/scraper/processscraper/metadata.yaml | 6 ++++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/receiver/hostmetricsreceiver/README.md b/receiver/hostmetricsreceiver/README.md index 1098fbe6f4caf..8e6c9d67c8e7e 100644 --- a/receiver/hostmetricsreceiver/README.md +++ b/receiver/hostmetricsreceiver/README.md @@ -16,7 +16,7 @@ The Host Metrics receiver generates metrics about the host system scraped -from various sources and host entity event as log. This is intended to be +from various sources and host entity event as log. This is intended to be used when the collector is deployed as an agent. ## Getting Started @@ -119,6 +119,7 @@ process: mute_process_io_error: mute_process_user_error: mute_process_cgroup_error: + disable_wmi: scrape_process_delay: