Skip to content

Commit f7a3046

Browse files
Merge pull request #22658 from giuseppe/libpod-wait-for-healthy-on-main-thread
libpod: wait for healthy on main thread
2 parents d1dd720 + 35375e0 commit f7a3046

File tree

5 files changed

+62
-19
lines changed

5 files changed

+62
-19
lines changed

libpod/container_api.go

+20-2
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,10 @@ func (c *Container) Start(ctx context.Context, recursive bool) (finalErr error)
115115
}
116116

117117
// Start the container
118-
return c.start(ctx)
118+
if err := c.start(); err != nil {
119+
return err
120+
}
121+
return c.waitForHealthy(ctx)
119122
}
120123

121124
// Update updates the given container.
@@ -194,6 +197,9 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
194197
opts.Start = true
195198
opts.Started = startedChan
196199

200+
// attach and start the container on a different thread. waitForHealthy must
201+
// be done later, as it requires to run on the same thread that holds the lock
202+
// for the container.
197203
if err := c.ociRuntime.Attach(c, opts); err != nil {
198204
attachChan <- err
199205
}
@@ -207,7 +213,7 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
207213
c.newContainerEvent(events.Attach)
208214
}
209215

210-
return attachChan, nil
216+
return attachChan, c.waitForHealthy(ctx)
211217
}
212218

213219
// RestartWithTimeout restarts a running container and takes a given timeout in uint
@@ -775,6 +781,14 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
775781
}
776782
}
777783
if len(wantedHealthStates) > 0 {
784+
// even if we are interested only in the health check
785+
// check that the container is still running to avoid
786+
// waiting until the timeout expires.
787+
state, err := c.State()
788+
if err != nil {
789+
trySend(-1, err)
790+
return
791+
}
778792
status, err := c.HealthCheckStatus()
779793
if err != nil {
780794
trySend(-1, err)
@@ -784,6 +798,10 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
784798
trySend(-1, nil)
785799
return
786800
}
801+
if state != define.ContainerStateCreated && state != define.ContainerStateRunning && state != define.ContainerStatePaused {
802+
trySend(-1, define.ErrCtrStopped)
803+
return
804+
}
787805
}
788806
select {
789807
case <-ctx.Done():

libpod/container_internal.go

+23-9
Original file line numberDiff line numberDiff line change
@@ -330,10 +330,10 @@ func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr err
330330
return false, err
331331
}
332332
}
333-
if err := c.start(ctx); err != nil {
333+
if err := c.start(); err != nil {
334334
return false, err
335335
}
336-
return true, nil
336+
return true, c.waitForHealthy(ctx)
337337
}
338338

339339
// Ensure that the container is in a specific state or state.
@@ -1208,7 +1208,7 @@ func (c *Container) reinit(ctx context.Context, retainRetries bool) error {
12081208

12091209
// Initialize (if necessary) and start a container
12101210
// Performs all necessary steps to start a container that is not running
1211-
// Does not lock or check validity
1211+
// Does not lock or check validity, requires to run on the same thread that holds the lock for the container.
12121212
func (c *Container) initAndStart(ctx context.Context) (retErr error) {
12131213
// If we are ContainerStateUnknown, throw an error
12141214
if c.state.State == define.ContainerStateUnknown {
@@ -1253,11 +1253,14 @@ func (c *Container) initAndStart(ctx context.Context) (retErr error) {
12531253
}
12541254

12551255
// Now start the container
1256-
return c.start(ctx)
1256+
if err := c.start(); err != nil {
1257+
return err
1258+
}
1259+
return c.waitForHealthy(ctx)
12571260
}
12581261

12591262
// Internal, non-locking function to start a container
1260-
func (c *Container) start(ctx context.Context) error {
1263+
func (c *Container) start() error {
12611264
if c.config.Spec.Process != nil {
12621265
logrus.Debugf("Starting container %s with command %v", c.ID(), c.config.Spec.Process.Args)
12631266
}
@@ -1298,10 +1301,14 @@ func (c *Container) start(ctx context.Context) error {
12981301

12991302
c.newContainerEvent(events.Start)
13001303

1301-
if err := c.save(); err != nil {
1302-
return err
1303-
}
1304+
return c.save()
1305+
}
13041306

1307+
// waitForHealthy, when sdNotifyMode == SdNotifyModeHealthy, waits up to the DefaultWaitInterval
1308+
// for the container to get into the healthy state and reports the status to the notify socket.
1309+
// The function unlocks the container lock, so it must be called from the same thread that locks
1310+
// the container.
1311+
func (c *Container) waitForHealthy(ctx context.Context) error {
13051312
if c.config.SdNotifyMode != define.SdNotifyModeHealthy {
13061313
return nil
13071314
}
@@ -1315,6 +1322,9 @@ func (c *Container) start(ctx context.Context) error {
13151322
}
13161323

13171324
if _, err := c.WaitForConditionWithInterval(ctx, DefaultWaitInterval, define.HealthCheckHealthy); err != nil {
1325+
if errors.Is(err, define.ErrNoSuchCtr) {
1326+
return nil
1327+
}
13181328
return err
13191329
}
13201330

@@ -1566,6 +1576,7 @@ func (c *Container) unpause() error {
15661576
}
15671577

15681578
// Internal, non-locking function to restart a container
1579+
// It requires to run on the same thread that holds the lock.
15691580
func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retErr error) {
15701581
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateRunning, define.ContainerStateStopped, define.ContainerStateExited) {
15711582
return fmt.Errorf("unable to restart a container in a paused or unknown state: %w", define.ErrCtrStateInvalid)
@@ -1613,7 +1624,10 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retEr
16131624
return err
16141625
}
16151626
}
1616-
return c.start(ctx)
1627+
if err := c.start(); err != nil {
1628+
return err
1629+
}
1630+
return c.waitForHealthy(ctx)
16171631
}
16181632

16191633
// mountStorage sets up the container's root filesystem

libpod/oci_conmon_attach_common.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
package libpod
44

55
import (
6-
"context"
76
"errors"
87
"fmt"
98
"io"
@@ -32,6 +31,7 @@ const (
3231
// Attach to the given container.
3332
// Does not check if state is appropriate.
3433
// started is only required if startContainer is true.
34+
// It does not wait for the container to be healthy, it is the caller responsibility to do so.
3535
func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
3636
passthrough := c.LogDriver() == define.PassthroughLogging || c.LogDriver() == define.PassthroughTTYLogging
3737

@@ -86,7 +86,7 @@ func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
8686
// If starting was requested, start the container and notify when that's
8787
// done.
8888
if params.Start {
89-
if err := c.start(context.TODO()); err != nil {
89+
if err := c.start(); err != nil {
9090
return err
9191
}
9292
params.Started <- true

pkg/domain/infra/abi/containers.go

+8-6
Original file line numberDiff line numberDiff line change
@@ -995,7 +995,10 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri
995995
return reports, fmt.Errorf("unable to start container %s: %w", ctr.ID(), err)
996996
}
997997

998-
exitCode = ic.GetContainerExitCode(ctx, ctr.Container)
998+
exitCode, err2 := ic.ContainerWaitForExitCode(ctx, ctr.Container)
999+
if err2 != nil {
1000+
logrus.Errorf("Waiting for container %s: %v", ctr.ID(), err2)
1001+
}
9991002
reports = append(reports, &entities.ContainerStartReport{
10001003
Id: ctr.ID(),
10011004
RawInput: ctr.rawInput,
@@ -1189,7 +1192,7 @@ func (ic *ContainerEngine) ContainerRun(ctx context.Context, opts entities.Conta
11891192
report.ExitCode = define.ExitCode(err)
11901193
return &report, err
11911194
}
1192-
report.ExitCode = ic.GetContainerExitCode(ctx, ctr)
1195+
report.ExitCode, _ = ic.ContainerWaitForExitCode(ctx, ctr)
11931196
if opts.Rm && !ctr.ShouldRestart(ctx) {
11941197
if err := removeContainer(ctr, false); err != nil {
11951198
if errors.Is(err, define.ErrNoSuchCtr) ||
@@ -1203,14 +1206,13 @@ func (ic *ContainerEngine) ContainerRun(ctx context.Context, opts entities.Conta
12031206
return &report, nil
12041207
}
12051208

1206-
func (ic *ContainerEngine) GetContainerExitCode(ctx context.Context, ctr *libpod.Container) int {
1209+
func (ic *ContainerEngine) ContainerWaitForExitCode(ctx context.Context, ctr *libpod.Container) (int, error) {
12071210
exitCode, err := ctr.Wait(ctx)
12081211
if err != nil {
1209-
logrus.Errorf("Waiting for container %s: %v", ctr.ID(), err)
12101212
intExitCode := int(define.ExecErrorCodeNotFound)
1211-
return intExitCode
1213+
return intExitCode, err
12121214
}
1213-
return int(exitCode)
1215+
return int(exitCode), nil
12141216
}
12151217

12161218
func (ic *ContainerEngine) ContainerLogs(ctx context.Context, namesOrIds []string, options entities.ContainerLogsOptions) error {

test/system/260-sdnotify.bats

+9
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,15 @@ READY=1" "Container log after healthcheck run"
233233
is "$output" "running" "make sure container is still running"
234234

235235
run_podman rm -f -t0 $ctr
236+
237+
ctr=$(random_string)
238+
run_podman run --name $ctr \
239+
--health-cmd="touch /terminate" \
240+
--sdnotify=healthy \
241+
$IMAGE sh -c 'while test \! -e /terminate; do sleep 0.1; done; echo finished'
242+
is "$output" "finished" "make sure container exited successfully"
243+
run_podman rm -f -t0 $ctr
244+
236245
_stop_socat
237246
}
238247

0 commit comments

Comments
 (0)