Skip to content

Commit ccc500c

Browse files
committed
seccomp: patchbpf: always include native architecture in stub
It turns out that on ppc64le (at least), Docker doesn't include any architectures in the list of allowed architectures. libseccomp interprets this as "just include the default architecture" but patchbpf would return a no-op ENOSYS stub, which would lead to the exact issues that commit 7a8d716 ("seccomp: prepend -ENOSYS stub to all filters") fixed for other architectures. So, just always include the running architecture in the list. There's no real downside. Ref: https://bugzilla.suse.com/show_bug.cgi?id=1192051#c6 Reported-by: Fabian Vogt <[email protected]> Signed-off-by: Aleksa Sarai <[email protected]>
1 parent b288abe commit ccc500c

File tree

2 files changed

+56
-8
lines changed

2 files changed

+56
-8
lines changed

libcontainer/seccomp/patchbpf/enosys_linux.go

+18-4
Original file line numberDiff line numberDiff line change
@@ -231,16 +231,30 @@ type lastSyscallMap map[linuxAuditArch]map[libseccomp.ScmpArch]libseccomp.ScmpSy
231231
// representation, but SCMP_ARCH_X32 means we have to track cases where the
232232
// same architecture has different largest syscalls based on the mode.
233233
func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) {
234-
lastSyscalls := make(lastSyscallMap)
235-
// Only loop over architectures which are present in the filter. Any other
236-
// architectures will get the libseccomp bad architecture action anyway.
234+
scmpArchs := make(map[libseccomp.ScmpArch]struct{})
237235
for _, ociArch := range config.Architectures {
238236
arch, err := libseccomp.GetArchFromString(ociArch)
239237
if err != nil {
240238
return nil, fmt.Errorf("unable to validate seccomp architecture: %w", err)
241239
}
240+
scmpArchs[arch] = struct{}{}
241+
}
242+
// On architectures like ppc64le, Docker inexplicably doesn't include the
243+
// native architecture in the architecture list which results in no
244+
// architectures being present in the list at all (rendering the ENOSYS
245+
// stub a no-op). So, always include the native architecture.
246+
if nativeScmpArch, err := libseccomp.GetNativeArch(); err != nil {
247+
return nil, fmt.Errorf("unable to get native arch: %w", err)
248+
} else if _, ok := scmpArchs[nativeScmpArch]; !ok {
249+
logrus.Debugf("seccomp: adding implied native architecture %v to config set", nativeScmpArch)
250+
scmpArchs[nativeScmpArch] = struct{}{}
251+
}
252+
logrus.Debugf("seccomp: configured architecture set: %s", scmpArchs)
242253

243-
// Figure out native architecture representation of the architecture.
254+
// Only loop over architectures which are present in the filter. Any other
255+
// architectures will get the libseccomp bad architecture action anyway.
256+
lastSyscalls := make(lastSyscallMap)
257+
for arch := range scmpArchs {
244258
auditArch, err := scmpArchToAuditArch(arch)
245259
if err != nil {
246260
return nil, fmt.Errorf("cannot map architecture %v to AUDIT_ARCH_ constant: %w", arch, err)

libcontainer/seccomp/patchbpf/enosys_linux_test.go

+38-4
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,16 @@ var testArches = []string{
105105
"ppc64le",
106106
"s390",
107107
"s390x",
108+
// Dummy value to indicate a configuration with no architecture specified.
109+
"native",
108110
}
109111

112+
// Used for the "native" architecture.
113+
var (
114+
scmpNativeArch, _ = libseccomp.GetNativeArch()
115+
nativeArch = scmpNativeArch.String()
116+
)
117+
110118
func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string) {
111119
explicitSyscalls := []string{
112120
"setns",
@@ -155,6 +163,9 @@ func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string)
155163
expected uint32
156164
}
157165

166+
if arch == "native" {
167+
arch = nativeArch
168+
}
158169
scmpArch, err := libseccomp.GetArchFromString(arch)
159170
if err != nil {
160171
t.Fatalf("unknown libseccomp architecture %q: %v", arch, err)
@@ -228,8 +239,15 @@ func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string)
228239

229240
// Test syscalls in the explicit list.
230241
for _, test := range syscallTests {
231-
// Override the expected value in the two special cases.
232-
if !archSet[arch] || isAllowAction(defaultAction) {
242+
// Override the expected value in the two special cases:
243+
// 1. If the default action is allow, the filter won't have
244+
// the stub prepended so we expect a fallthrough.
245+
// 2. If the executing architecture is not in the architecture
246+
// set, then the architecture is not handled by the stub --
247+
// *except* in the case of the native architecture (which
248+
// is always included in the stub).
249+
if isAllowAction(defaultAction) ||
250+
(!archSet[arch] && arch != nativeArch) {
233251
test.expected = retFallthrough
234252
}
235253

@@ -263,7 +281,14 @@ var testActions = map[string]configs.Action{
263281

264282
func TestEnosysStub_SingleArch(t *testing.T) {
265283
for _, arch := range testArches {
266-
arches := []string{arch}
284+
var arches []string
285+
// "native" indicates a blank architecture field for seccomp, to test
286+
// the case where the running architecture was not included in the
287+
// architecture. Docker doesn't always set the architecture for some
288+
// reason (namely for ppc64le).
289+
if arch != "native" {
290+
arches = append(arches, arch)
291+
}
267292
t.Run("arch="+arch, func(t *testing.T) {
268293
for name, action := range testActions {
269294
t.Run("action="+name, func(t *testing.T) {
@@ -277,7 +302,16 @@ func TestEnosysStub_SingleArch(t *testing.T) {
277302
func TestEnosysStub_MultiArch(t *testing.T) {
278303
for end := 0; end < len(testArches); end++ {
279304
for start := 0; start < end; start++ {
280-
arches := testArches[start:end]
305+
var arches []string
306+
for _, arch := range testArches[start:end] {
307+
// "native" indicates a blank architecture field for seccomp, to test
308+
// the case where the running architecture was not included in the
309+
// architecture. Docker doesn't always set the architecture for some
310+
// reason (namely for ppc64le).
311+
if arch != "native" {
312+
arches = append(arches, arch)
313+
}
314+
}
281315
if len(arches) <= 1 {
282316
continue
283317
}

0 commit comments

Comments
 (0)