Skip to content

Commit 69ab076

Browse files
committed
fix: re-create cgroups when restarting runners
Make sure processes are only launched into freshly-created cgroups with all limits set when they are restarted. This also allows processes to restart after cgroup being killed via the cgroup.kill mechanism. Fixes #11785 Signed-off-by: Dmitrii Sharshakov <[email protected]>
1 parent 297b5cc commit 69ab076

File tree

12 files changed

+254
-193
lines changed

12 files changed

+254
-193
lines changed

internal/app/machined/pkg/runtime/v1alpha1/platform/platform.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import (
3434
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/upcloud"
3535
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/vmware"
3636
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/vultr"
37+
"github.com/siderolabs/talos/internal/pkg/containermode"
3738
"github.com/siderolabs/talos/pkg/machinery/constants"
3839
)
3940

@@ -67,7 +68,7 @@ const (
6768

6869
// CurrentPlatform is a helper func for discovering the current platform.
6970
func CurrentPlatform() (p runtime.Platform, err error) {
70-
if _, err := os.Stat("/usr/etc/in-container"); err == nil {
71+
if containermode.InContainer() {
7172
return newPlatform("container")
7273
}
7374

internal/app/machined/pkg/startup/cgroups.go

Lines changed: 7 additions & 172 deletions
Original file line numberDiff line numberDiff line change
@@ -8,34 +8,16 @@ import (
88
"context"
99
"errors"
1010
"fmt"
11-
"os"
1211

1312
"github.com/containerd/cgroups/v3"
14-
"github.com/containerd/cgroups/v3/cgroup1"
15-
"github.com/containerd/cgroups/v3/cgroup2"
16-
"github.com/opencontainers/runtime-spec/specs-go"
17-
"github.com/siderolabs/go-debug"
18-
"github.com/siderolabs/go-pointer"
1913
"go.uber.org/zap"
2014

2115
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
2216
"github.com/siderolabs/talos/internal/pkg/cgroup"
2317
"github.com/siderolabs/talos/pkg/machinery/constants"
2418
)
2519

26-
func zeroIfRace[T any](v T) T {
27-
if debug.RaceEnabled {
28-
var zeroT T
29-
30-
return zeroT
31-
}
32-
33-
return v
34-
}
35-
3620
// CreateSystemCgroups creates system cgroups.
37-
//
38-
//nolint:gocyclo
3921
func CreateSystemCgroups(ctx context.Context, log *zap.Logger, rt runtime.Runtime, next NextTaskFunc) error {
4022
// in container mode cgroups mode depends on cgroups provided by the container runtime
4123
if !rt.State().Platform().Mode().InContainer() {
@@ -53,163 +35,16 @@ func CreateSystemCgroups(ctx context.Context, log *zap.Logger, rt runtime.Runtim
5335

5436
log.Info("initializing cgroups", zap.String("root", cgroup.Root()))
5537

56-
groups := []struct {
57-
name string
58-
resources *cgroup2.Resources
59-
}{
60-
{
61-
name: constants.CgroupInit,
62-
resources: &cgroup2.Resources{
63-
Memory: &cgroup2.Memory{
64-
Min: pointer.To[int64](constants.CgroupInitReservedMemory),
65-
Low: pointer.To[int64](constants.CgroupInitReservedMemory * 2),
66-
},
67-
CPU: &cgroup2.CPU{
68-
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupInitMillicores))),
69-
},
70-
},
71-
},
72-
{
73-
name: constants.CgroupSystem,
74-
resources: &cgroup2.Resources{
75-
Memory: &cgroup2.Memory{
76-
Min: pointer.To[int64](constants.CgroupSystemReservedMemory),
77-
Low: pointer.To[int64](constants.CgroupSystemReservedMemory * 2),
78-
},
79-
CPU: &cgroup2.CPU{
80-
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupSystemMillicores))),
81-
},
82-
},
83-
},
84-
{
85-
name: constants.CgroupSystemRuntime,
86-
resources: &cgroup2.Resources{
87-
Memory: &cgroup2.Memory{
88-
Min: pointer.To[int64](constants.CgroupSystemRuntimeReservedMemory),
89-
Low: pointer.To[int64](constants.CgroupSystemRuntimeReservedMemory * 2),
90-
},
91-
CPU: &cgroup2.CPU{
92-
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupSystemRuntimeMillicores))),
93-
},
94-
},
95-
},
96-
{
97-
name: constants.CgroupUdevd,
98-
resources: &cgroup2.Resources{
99-
Memory: &cgroup2.Memory{
100-
Min: pointer.To[int64](constants.CgroupUdevdReservedMemory),
101-
Low: pointer.To[int64](constants.CgroupUdevdReservedMemory * 2),
102-
},
103-
CPU: &cgroup2.CPU{
104-
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupUdevdMillicores))),
105-
},
106-
},
107-
},
108-
{
109-
name: constants.CgroupPodRuntimeRoot,
110-
resources: &cgroup2.Resources{
111-
CPU: &cgroup2.CPU{
112-
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupPodRuntimeRootMillicores))),
113-
},
114-
},
115-
},
116-
{
117-
name: constants.CgroupPodRuntime,
118-
resources: &cgroup2.Resources{
119-
Memory: &cgroup2.Memory{
120-
Min: pointer.To[int64](constants.CgroupPodRuntimeReservedMemory),
121-
Low: pointer.To[int64](constants.CgroupPodRuntimeReservedMemory * 2),
122-
},
123-
CPU: &cgroup2.CPU{
124-
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupPodRuntimeMillicores))),
125-
},
126-
},
127-
},
128-
{
129-
name: constants.CgroupKubelet,
130-
resources: &cgroup2.Resources{
131-
Memory: &cgroup2.Memory{
132-
Min: pointer.To[int64](constants.CgroupKubeletReservedMemory),
133-
Low: pointer.To[int64](constants.CgroupKubeletReservedMemory * 2),
134-
},
135-
CPU: &cgroup2.CPU{
136-
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupKubeletMillicores))),
137-
},
138-
},
139-
},
140-
{
141-
name: constants.CgroupDashboard,
142-
resources: &cgroup2.Resources{
143-
Memory: &cgroup2.Memory{
144-
Max: zeroIfRace(pointer.To[int64](constants.CgroupDashboardMaxMemory)),
145-
},
146-
CPU: &cgroup2.CPU{
147-
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupDashboardMillicores))),
148-
},
149-
},
150-
},
151-
{
152-
name: constants.CgroupApid,
153-
resources: &cgroup2.Resources{
154-
Memory: &cgroup2.Memory{
155-
Min: pointer.To[int64](constants.CgroupApidReservedMemory),
156-
Low: pointer.To[int64](constants.CgroupApidReservedMemory * 2),
157-
Max: zeroIfRace(pointer.To[int64](constants.CgroupApidMaxMemory)),
158-
Swap: pointer.To[int64](0),
159-
},
160-
CPU: &cgroup2.CPU{
161-
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupApidMillicores))),
162-
},
163-
},
164-
},
165-
{
166-
name: constants.CgroupTrustd,
167-
resources: &cgroup2.Resources{
168-
Memory: &cgroup2.Memory{
169-
Min: pointer.To[int64](constants.CgroupTrustdReservedMemory),
170-
Low: pointer.To[int64](constants.CgroupTrustdReservedMemory * 2),
171-
Max: zeroIfRace(pointer.To[int64](constants.CgroupTrustdMaxMemory)),
172-
Swap: pointer.To[int64](0),
173-
},
174-
CPU: &cgroup2.CPU{
175-
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupTrustdMillicores))),
176-
},
177-
},
178-
},
38+
groups := []string{
39+
constants.CgroupInit,
40+
constants.CgroupSystem,
41+
constants.CgroupPodRuntimeRoot,
17942
}
18043

18144
for _, c := range groups {
182-
if cgroups.Mode() == cgroups.Unified {
183-
resources := c.resources
184-
185-
if rt.State().Platform().Mode().InContainer() {
186-
// don't attempt to set resources in container mode, as they might conflict with the parent cgroup tree
187-
resources = &cgroup2.Resources{}
188-
}
189-
190-
cg, err := cgroup2.NewManager(constants.CgroupMountPath, cgroup.Path(c.name), resources)
191-
if err != nil {
192-
return fmt.Errorf("failed to create cgroup: %w", err)
193-
}
194-
195-
if c.name == constants.CgroupInit {
196-
if err := cg.AddProc(uint64(os.Getpid())); err != nil {
197-
return fmt.Errorf("failed to move init process to cgroup: %w", err)
198-
}
199-
}
200-
} else {
201-
cg, err := cgroup1.New(cgroup1.StaticPath(c.name), &specs.LinuxResources{})
202-
if err != nil {
203-
return fmt.Errorf("failed to create cgroup: %w", err)
204-
}
205-
206-
if c.name == constants.CgroupInit {
207-
if err := cg.Add(cgroup1.Process{
208-
Pid: os.Getpid(),
209-
}); err != nil {
210-
return fmt.Errorf("failed to move init process to cgroup: %w", err)
211-
}
212-
}
45+
_, err := cgroup.CreateCgroup(c)
46+
if err != nil {
47+
return err
21348
}
21449
}
21550

internal/app/machined/pkg/system/runner/containerd/containerd.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"fmt"
1111
"io"
1212
"log"
13+
"os"
1314
"syscall"
1415
"time"
1516

@@ -168,6 +169,21 @@ func (c *containerdRunner) Run(eventSink events.Recorder) error {
168169
return fmt.Errorf("error creating log: %w", err)
169170
}
170171

172+
cg, err := cgroup.CreateCgroup(c.opts.CgroupPath)
173+
if err != nil {
174+
return fmt.Errorf("error creating cgroup: %w", err)
175+
}
176+
177+
// If the task is not cleaned up by containerd or another error
178+
// happens during the lifecycle, remove the cgroup before exiting
179+
// if one still exists
180+
defer func() {
181+
err := cg.Delete()
182+
if err != nil && !os.IsNotExist(err) {
183+
eventSink(events.StateStopping, "Failed to remove cgroup for %s, %s", c, err)
184+
}
185+
}()
186+
171187
defer logW.Close() //nolint:errcheck
172188

173189
var w io.Writer = logW

internal/app/machined/pkg/system/runner/process/process.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,20 @@ func setSchedulingPolicy(p *processRunner, pid int, schedulingPolicy uint) error
413413
return nil
414414
}
415415

416+
//nolint:gocyclo
416417
func (p *processRunner) run(eventSink events.Recorder) error {
418+
cg, err := cgroup.CreateCgroup(p.opts.CgroupPath)
419+
if err != nil {
420+
return fmt.Errorf("error creating cgroup: %w", err)
421+
}
422+
423+
defer func() {
424+
err := cg.Delete()
425+
if err != nil {
426+
eventSink(events.StateStopping, "Failed to remove cgroup for %s, %s", p, err)
427+
}
428+
}()
429+
417430
cmdWrapper, err := p.build()
418431
if err != nil {
419432
return fmt.Errorf("error building command: %w", err)

internal/app/machined/pkg/system/services/apid.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -177,12 +177,7 @@ func (o *APID) Runner(r runtime.Runtime) (runner.Runner, error) {
177177
{Type: "bind", Destination: filepath.Dir(constants.APISocketPath), Source: filepath.Dir(constants.APISocketPath), Options: []string{"rbind", "rw"}},
178178
}
179179

180-
if _, err := os.Stat("/usr/etc/in-container"); err == nil {
181-
mounts = append(
182-
mounts,
183-
specs.Mount{Type: "bind", Destination: "/usr/etc/in-container", Source: "/usr/etc/in-container", Options: []string{"bind", "ro"}},
184-
)
185-
}
180+
mounts = bindMountContainerMarker(mounts)
186181

187182
env := []string{
188183
constants.TcellMinimizeEnvironment,

internal/app/machined/pkg/system/services/extension.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -176,12 +176,7 @@ func (svc *Extension) Runner(r runtime.Runtime) (runner.Runner, error) {
176176

177177
mounts := append([]specs.Mount{}, svc.Spec.Container.Mounts...)
178178

179-
if _, err := os.Stat("/usr/etc/in-container"); err == nil {
180-
mounts = append(
181-
mounts,
182-
specs.Mount{Type: "bind", Destination: "/usr/etc/in-container", Source: "/usr/etc/in-container", Options: []string{"bind", "ro"}},
183-
)
184-
}
179+
mounts = bindMountContainerMarker(mounts)
185180

186181
envVars, err := svc.parseEnvironment()
187182
if err != nil {

internal/app/machined/pkg/system/services/trustd.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -154,12 +154,7 @@ func (t *Trustd) Runner(r runtime.Runtime) (runner.Runner, error) {
154154
{Type: "bind", Destination: filepath.Dir(constants.TrustdRuntimeSocketPath), Source: filepath.Dir(constants.TrustdRuntimeSocketPath), Options: []string{"rbind", "ro"}},
155155
}
156156

157-
if _, err := os.Stat("/usr/etc/in-container"); err == nil {
158-
mounts = append(
159-
mounts,
160-
specs.Mount{Type: "bind", Destination: "/usr/etc/in-container", Source: "/usr/etc/in-container", Options: []string{"bind", "ro"}},
161-
)
162-
}
157+
mounts = bindMountContainerMarker(mounts)
163158

164159
env := environment.Get(r.Config())
165160
env = append(env,

internal/app/machined/pkg/system/services/utils.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@ import (
99
"os"
1010
"path/filepath"
1111

12+
specs "github.com/opencontainers/runtime-spec/specs-go"
1213
"golang.org/x/sys/unix"
1314

15+
"github.com/siderolabs/talos/internal/pkg/containermode"
1416
"github.com/siderolabs/talos/pkg/machinery/constants"
1517
)
1618

@@ -34,3 +36,15 @@ func prepareRootfs(id string) error {
3436

3537
return nil
3638
}
39+
40+
// bindMountContainerMarker bind-mounts a file used for container detection into a container service.
41+
func bindMountContainerMarker(mounts []specs.Mount) []specs.Mount {
42+
if containermode.InContainer() {
43+
mounts = append(
44+
mounts,
45+
specs.Mount{Type: "bind", Destination: constants.ContainerMarkerFilePath, Source: constants.ContainerMarkerFilePath, Options: []string{"bind", "ro"}},
46+
)
47+
}
48+
49+
return mounts
50+
}

0 commit comments

Comments
 (0)