Skip to content

Commit abf0350

Browse files
Merge pull request #23206 from Luap99/rootless-reexec-userns
pkg/rootless: simplify reexec for container code
2 parents 464a799 + a2c83cb commit abf0350

File tree

7 files changed

+34
-143
lines changed

7 files changed

+34
-143
lines changed

pkg/domain/infra/abi/system_linux.go

+11-10
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,16 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool,
3030
}
3131
}
3232

33-
configureCgroup := cgroupMode != "disabled"
34-
if configureCgroup {
33+
hasCapSysAdmin, err := unshare.HasCapSysAdmin()
34+
if err != nil {
35+
return err
36+
}
37+
38+
// check for both euid == 0 and CAP_SYS_ADMIN because we may be running in a container with CAP_SYS_ADMIN set.
39+
if os.Geteuid() == 0 && hasCapSysAdmin {
3540
// do it only after podman has already re-execed and running with uid==0.
36-
hasCapSysAdmin, err := unshare.HasCapSysAdmin()
37-
if err != nil {
38-
return err
39-
}
40-
// check for both euid == 0 and CAP_SYS_ADMIN because we may be running in a container with CAP_SYS_ADMIN set.
41-
if os.Geteuid() == 0 && hasCapSysAdmin {
41+
configureCgroup := cgroupMode != "disabled"
42+
if configureCgroup {
4243
ownsCgroup, err := cgroups.UserOwnsCurrentSystemdCgroup()
4344
if err != nil {
4445
logrus.Infof("Failed to detect the owner for the current cgroup: %v", err)
@@ -55,8 +56,8 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool,
5556
}
5657
}
5758
}
58-
return nil
5959
}
60+
return nil
6061
}
6162

6263
pausePidPath, err := util.GetRootlessPauseProcessPidPath()
@@ -88,7 +89,7 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool,
8889
}
8990

9091
if len(paths) > 0 {
91-
became, ret, err = rootless.TryJoinFromFilePaths(pausePidPath, true, paths)
92+
became, ret, err = rootless.TryJoinFromFilePaths(pausePidPath, paths)
9293
} else {
9394
became, ret, err = rootless.BecomeRootInUserNS(pausePidPath)
9495
if err == nil {

pkg/rootless/rootless.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ func TryJoinPauseProcess(pausePidPath string) (bool, int, error) {
2424
return false, -1, err
2525
}
2626

27-
became, ret, err := TryJoinFromFilePaths("", false, []string{pausePidPath})
27+
became, ret, err := TryJoinFromFilePaths("", []string{pausePidPath})
2828
if err == nil {
2929
return became, ret, nil
3030
}
@@ -45,7 +45,7 @@ func TryJoinPauseProcess(pausePidPath string) (bool, int, error) {
4545
}()
4646

4747
// Now the pause PID file is locked. Try to join once again in case it changed while it was not locked.
48-
became, ret, err = TryJoinFromFilePaths("", false, []string{pausePidPath})
48+
became, ret, err = TryJoinFromFilePaths("", []string{pausePidPath})
4949
if err != nil {
5050
// It is still failing. We can safely remove it.
5151
os.Remove(pausePidPath)

pkg/rootless/rootless_freebsd.go

+1-5
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,7 @@ func GetRootlessGID() int {
3838
// This is useful when there are already running containers and we
3939
// don't have a pause process yet. We can use the paths to the conmon
4040
// processes to attempt joining their namespaces.
41-
// If needNewNamespace is set, the file is read from a temporary user
42-
// namespace, this is useful for containers that are running with a
43-
// different uidmap and the unprivileged user has no way to read the
44-
// file owned by the root in the container.
45-
func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
41+
func TryJoinFromFilePaths(pausePidPath string, paths []string) (bool, int, error) {
4642
return false, -1, errors.New("this function is not supported on this os")
4743
}
4844

pkg/rootless/rootless_linux.c

+1-49
Original file line numberDiff line numberDiff line change
@@ -947,49 +947,8 @@ check_proc_sys_userns_file (const char *path)
947947
}
948948
}
949949

950-
static int
951-
copy_file_to_fd (const char *file_to_read, int outfd)
952-
{
953-
char buf[512];
954-
cleanup_close int fd = -1;
955-
956-
fd = open (file_to_read, O_RDONLY);
957-
if (fd < 0)
958-
{
959-
fprintf (stderr, "open `%s`: %m\n", file_to_read);
960-
return fd;
961-
}
962-
963-
for (;;)
964-
{
965-
ssize_t r, w, t = 0;
966-
967-
r = TEMP_FAILURE_RETRY (read (fd, buf, sizeof buf));
968-
if (r < 0)
969-
{
970-
fprintf (stderr, "read from `%s`: %m\n", file_to_read);
971-
return r;
972-
}
973-
974-
if (r == 0)
975-
break;
976-
977-
while (t < r)
978-
{
979-
w = TEMP_FAILURE_RETRY (write (outfd, &buf[t], r - t));
980-
if (w < 0)
981-
{
982-
fprintf (stderr, "write file to output fd `%s`: %m\n", file_to_read);
983-
return w;
984-
}
985-
t += w;
986-
}
987-
}
988-
return 0;
989-
}
990-
991950
int
992-
reexec_in_user_namespace (int ready, char *pause_pid_file_path, char *file_to_read, int outputfd)
951+
reexec_in_user_namespace (int ready, char *pause_pid_file_path)
993952
{
994953
cleanup_free char **argv = NULL;
995954
cleanup_free char *argv0 = NULL;
@@ -1138,13 +1097,6 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path, char *file_to_re
11381097
_exit (EXIT_FAILURE);
11391098
}
11401099

1141-
if (file_to_read && file_to_read[0])
1142-
{
1143-
ret = copy_file_to_fd (file_to_read, outputfd);
1144-
close (outputfd);
1145-
_exit (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
1146-
}
1147-
11481100
execvp ("/proc/self/exe", argv);
11491101
fprintf (stderr, "failed to reexec: %m\n");
11501102

pkg/rootless/rootless_linux.go

+14-71
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ import (
3232
#include <sys/types.h>
3333
extern uid_t rootless_uid();
3434
extern uid_t rootless_gid();
35-
extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path, char *file_to_read, int fd);
35+
extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path);
3636
extern int reexec_in_user_namespace_wait(int pid, int options);
3737
extern int reexec_userns_join(int pid, char *pause_pid_file_path);
3838
extern int is_fd_inherited(int fd);
@@ -213,7 +213,7 @@ func copyMappings(from, to string) error {
213213
return os.WriteFile(to, content, 0600)
214214
}
215215

216-
func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ bool, _ int, retErr error) {
216+
func becomeRootInUserNS(pausePid string) (_ bool, _ int, retErr error) {
217217
hasCapSysAdmin, err := unshare.HasCapSysAdmin()
218218
if err != nil {
219219
return false, 0, err
@@ -249,13 +249,6 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo
249249
cPausePid := C.CString(pausePid)
250250
defer C.free(unsafe.Pointer(cPausePid))
251251

252-
cFileToRead := C.CString(fileToRead)
253-
defer C.free(unsafe.Pointer(cFileToRead))
254-
var fileOutputFD C.int
255-
if fileOutput != nil {
256-
fileOutputFD = C.int(fileOutput.Fd())
257-
}
258-
259252
runtime.LockOSThread()
260253
defer runtime.UnlockOSThread()
261254

@@ -287,7 +280,7 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo
287280
}
288281
}()
289282

290-
pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid, cFileToRead, fileOutputFD)
283+
pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid)
291284
pid = int(pidC)
292285
if pid < 0 {
293286
return false, -1, fmt.Errorf("cannot re-exec process")
@@ -361,14 +354,6 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo
361354
return false, -1, fmt.Errorf("read from sync pipe: %w", err)
362355
}
363356

364-
if fileOutput != nil {
365-
ret := C.reexec_in_user_namespace_wait(pidC, 0)
366-
if ret < 0 {
367-
return false, -1, errors.New("waiting for the re-exec process")
368-
}
369-
return true, 0, nil
370-
}
371-
372357
if b[0] == '2' {
373358
// We have lost the race for writing the PID file, as probably another
374359
// process created a namespace and wrote the PID.
@@ -434,69 +419,27 @@ func waitAndProxySignalsToChild(pid C.int) (bool, int, error) {
434419
// If podman was re-executed the caller needs to propagate the error code returned by the child
435420
// process.
436421
func BecomeRootInUserNS(pausePid string) (bool, int, error) {
437-
return becomeRootInUserNS(pausePid, "", nil)
422+
return becomeRootInUserNS(pausePid)
438423
}
439424

440425
// TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths.
441426
// This is useful when there are already running containers and we
442427
// don't have a pause process yet. We can use the paths to the conmon
443428
// processes to attempt joining their namespaces.
444-
// If needNewNamespace is set, the file is read from a temporary user
445-
// namespace, this is useful for containers that are running with a
446-
// different uidmap and the unprivileged user has no way to read the
447-
// file owned by the root in the container.
448-
func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
429+
func TryJoinFromFilePaths(pausePidPath string, paths []string) (bool, int, error) {
449430
var lastErr error
450-
var pausePid int
451431

452432
for _, path := range paths {
453-
if !needNewNamespace {
454-
data, err := os.ReadFile(path)
455-
if err != nil {
456-
lastErr = err
457-
continue
458-
}
459-
460-
pausePid, err = strconv.Atoi(string(data))
461-
if err != nil {
462-
lastErr = fmt.Errorf("cannot parse file %q: %w", path, err)
463-
continue
464-
}
465-
} else {
466-
r, w, err := os.Pipe()
467-
if err != nil {
468-
lastErr = err
469-
continue
470-
}
471-
472-
defer errorhandling.CloseQuiet(r)
473-
474-
if _, _, err := becomeRootInUserNS("", path, w); err != nil {
475-
w.Close()
476-
lastErr = err
477-
continue
478-
}
479-
480-
if err := w.Close(); err != nil {
481-
return false, 0, err
482-
}
483-
defer func() {
484-
C.reexec_in_user_namespace_wait(-1, 0)
485-
}()
486-
487-
b := make([]byte, 32)
488-
489-
n, err := r.Read(b)
490-
if err != nil {
491-
lastErr = fmt.Errorf("cannot read %q: %w", path, err)
492-
continue
493-
}
433+
data, err := os.ReadFile(path)
434+
if err != nil {
435+
lastErr = err
436+
continue
437+
}
494438

495-
pausePid, err = strconv.Atoi(string(b[:n]))
496-
if err != nil {
497-
lastErr = err
498-
continue
499-
}
439+
pausePid, err := strconv.Atoi(string(data))
440+
if err != nil {
441+
lastErr = fmt.Errorf("cannot parse file %q: %w", path, err)
442+
continue
500443
}
501444

502445
if pausePid > 0 && unix.Kill(pausePid, 0) == nil {

pkg/rootless/rootless_unsupported.go

+1-5
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,7 @@ func GetRootlessGID() int {
4141
// This is useful when there are already running containers and we
4242
// don't have a pause process yet. We can use the paths to the conmon
4343
// processes to attempt joining their namespaces.
44-
// If needNewNamespace is set, the file is read from a temporary user
45-
// namespace, this is useful for containers that are running with a
46-
// different uidmap and the unprivileged user has no way to read the
47-
// file owned by the root in the container.
48-
func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
44+
func TryJoinFromFilePaths(pausePidPath string, paths []string) (bool, int, error) {
4945
return false, -1, errors.New("this function is not supported on this os")
5046
}
5147

test/system/550-pause-process.bats

+4-1
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ function _check_pause_process() {
119119

120120
# First let's run a container in the background to keep the userns active
121121
local cname1=c1_$(random_string)
122-
run_podman run -d --name $cname1 $IMAGE top
122+
run_podman run -d --name $cname1 --uidmap 0:100:100 $IMAGE top
123123

124124
run_podman unshare readlink /proc/self/ns/user
125125
userns="$output"
@@ -136,6 +136,9 @@ function _check_pause_process() {
136136

137137
_test_sigproxy $cname2 $kidpid
138138

139+
# check pause process again
140+
_check_pause_process
141+
139142
# our container exits 0 so podman should too
140143
wait $kidpid || die "podman run exited $? instead of zero"
141144

0 commit comments

Comments
 (0)