Skip to content

Commit f4c34c7

Browse files
committed
fix: re-create cgroups when restarting runners
Make sure processes are only launched into freshly-created cgroups with all limits set when they are restarted. This also allows processes to restart after cgroup being killed via the cgroup.kill mechanism. Fixes #11785 Signed-off-by: Dmitrii Sharshakov <[email protected]>
1 parent 4d876d9 commit f4c34c7

File tree

3 files changed

+153
-140
lines changed

3 files changed

+153
-140
lines changed

internal/app/machined/pkg/startup/cgroups.go

Lines changed: 121 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -33,177 +33,158 @@ func zeroIfRace[T any](v T) T {
3333
return v
3434
}
3535

36-
// CreateSystemCgroups creates system cgroups.
37-
//
38-
//nolint:gocyclo
39-
func CreateSystemCgroups(ctx context.Context, log *zap.Logger, rt runtime.Runtime, next NextTaskFunc) error {
40-
// in container mode cgroups mode depends on cgroups provided by the container runtime
41-
if !rt.State().Platform().Mode().InContainer() {
42-
// assert that cgroupsv2 is being used when running not in container mode,
43-
// as Talos sets up cgroupsv2 on its own
44-
if cgroups.Mode() != cgroups.Unified {
45-
return errors.New("cgroupsv2 should be used")
46-
}
47-
}
48-
49-
// Initialize cgroups root path.
50-
if err := cgroup.InitRoot(); err != nil {
51-
return fmt.Errorf("error initializing cgroups root path: %w", err)
52-
}
53-
54-
log.Info("initializing cgroups", zap.String("root", cgroup.Root()))
55-
56-
groups := []struct {
57-
name string
58-
resources *cgroup2.Resources
59-
}{
60-
{
61-
name: constants.CgroupInit,
62-
resources: &cgroup2.Resources{
63-
Memory: &cgroup2.Memory{
64-
Min: pointer.To[int64](constants.CgroupInitReservedMemory),
65-
Low: pointer.To[int64](constants.CgroupInitReservedMemory * 2),
66-
},
67-
CPU: &cgroup2.CPU{
68-
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupInitMillicores))),
69-
},
36+
// CreateCgroupV2 creates a cgroupv2 with given resources.
37+
func CreateCgroupV2(name string, inContainer bool) (*cgroup2.Manager, error) {
38+
groups := map[string]*cgroup2.Resources{
39+
constants.CgroupInit: {
40+
Memory: &cgroup2.Memory{
41+
Min: pointer.To[int64](constants.CgroupInitReservedMemory),
42+
Low: pointer.To[int64](constants.CgroupInitReservedMemory * 2),
43+
},
44+
CPU: &cgroup2.CPU{
45+
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupInitMillicores))),
7046
},
7147
},
72-
{
73-
name: constants.CgroupSystem,
74-
resources: &cgroup2.Resources{
75-
Memory: &cgroup2.Memory{
76-
Min: pointer.To[int64](constants.CgroupSystemReservedMemory),
77-
Low: pointer.To[int64](constants.CgroupSystemReservedMemory * 2),
78-
},
79-
CPU: &cgroup2.CPU{
80-
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupSystemMillicores))),
81-
},
48+
constants.CgroupSystem: {
49+
Memory: &cgroup2.Memory{
50+
Min: pointer.To[int64](constants.CgroupSystemReservedMemory),
51+
Low: pointer.To[int64](constants.CgroupSystemReservedMemory * 2),
52+
},
53+
CPU: &cgroup2.CPU{
54+
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupSystemMillicores))),
8255
},
8356
},
84-
{
85-
name: constants.CgroupSystemRuntime,
86-
resources: &cgroup2.Resources{
87-
Memory: &cgroup2.Memory{
88-
Min: pointer.To[int64](constants.CgroupSystemRuntimeReservedMemory),
89-
Low: pointer.To[int64](constants.CgroupSystemRuntimeReservedMemory * 2),
90-
},
91-
CPU: &cgroup2.CPU{
92-
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupSystemRuntimeMillicores))),
93-
},
57+
constants.CgroupSystemRuntime: {
58+
Memory: &cgroup2.Memory{
59+
Min: pointer.To[int64](constants.CgroupSystemRuntimeReservedMemory),
60+
Low: pointer.To[int64](constants.CgroupSystemRuntimeReservedMemory * 2),
61+
},
62+
CPU: &cgroup2.CPU{
63+
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupSystemRuntimeMillicores))),
9464
},
9565
},
96-
{
97-
name: constants.CgroupUdevd,
98-
resources: &cgroup2.Resources{
99-
Memory: &cgroup2.Memory{
100-
Min: pointer.To[int64](constants.CgroupUdevdReservedMemory),
101-
Low: pointer.To[int64](constants.CgroupUdevdReservedMemory * 2),
102-
},
103-
CPU: &cgroup2.CPU{
104-
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupUdevdMillicores))),
105-
},
66+
constants.CgroupUdevd: {
67+
Memory: &cgroup2.Memory{
68+
Min: pointer.To[int64](constants.CgroupUdevdReservedMemory),
69+
Low: pointer.To[int64](constants.CgroupUdevdReservedMemory * 2),
70+
},
71+
CPU: &cgroup2.CPU{
72+
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupUdevdMillicores))),
10673
},
10774
},
108-
{
109-
name: constants.CgroupPodRuntimeRoot,
110-
resources: &cgroup2.Resources{
111-
CPU: &cgroup2.CPU{
112-
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupPodRuntimeRootMillicores))),
113-
},
75+
constants.CgroupPodRuntimeRoot: {
76+
CPU: &cgroup2.CPU{
77+
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupPodRuntimeRootMillicores))),
11478
},
11579
},
116-
{
117-
name: constants.CgroupPodRuntime,
118-
resources: &cgroup2.Resources{
119-
Memory: &cgroup2.Memory{
120-
Min: pointer.To[int64](constants.CgroupPodRuntimeReservedMemory),
121-
Low: pointer.To[int64](constants.CgroupPodRuntimeReservedMemory * 2),
122-
},
123-
CPU: &cgroup2.CPU{
124-
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupPodRuntimeMillicores))),
125-
},
80+
constants.CgroupPodRuntime: {
81+
Memory: &cgroup2.Memory{
82+
Min: pointer.To[int64](constants.CgroupPodRuntimeReservedMemory),
83+
Low: pointer.To[int64](constants.CgroupPodRuntimeReservedMemory * 2),
84+
},
85+
CPU: &cgroup2.CPU{
86+
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupPodRuntimeMillicores))),
12687
},
12788
},
128-
{
129-
name: constants.CgroupKubelet,
130-
resources: &cgroup2.Resources{
131-
Memory: &cgroup2.Memory{
132-
Min: pointer.To[int64](constants.CgroupKubeletReservedMemory),
133-
Low: pointer.To[int64](constants.CgroupKubeletReservedMemory * 2),
134-
},
135-
CPU: &cgroup2.CPU{
136-
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupKubeletMillicores))),
137-
},
89+
constants.CgroupKubelet: {
90+
Memory: &cgroup2.Memory{
91+
Min: pointer.To[int64](constants.CgroupKubeletReservedMemory),
92+
Low: pointer.To[int64](constants.CgroupKubeletReservedMemory * 2),
93+
},
94+
CPU: &cgroup2.CPU{
95+
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupKubeletMillicores))),
13896
},
13997
},
140-
{
141-
name: constants.CgroupDashboard,
142-
resources: &cgroup2.Resources{
143-
Memory: &cgroup2.Memory{
144-
Max: zeroIfRace(pointer.To[int64](constants.CgroupDashboardMaxMemory)),
145-
},
146-
CPU: &cgroup2.CPU{
147-
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupDashboardMillicores))),
148-
},
98+
constants.CgroupDashboard: {
99+
Memory: &cgroup2.Memory{
100+
Max: zeroIfRace(pointer.To[int64](constants.CgroupDashboardMaxMemory)),
101+
},
102+
CPU: &cgroup2.CPU{
103+
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupDashboardMillicores))),
149104
},
150105
},
151-
{
152-
name: constants.CgroupApid,
153-
resources: &cgroup2.Resources{
154-
Memory: &cgroup2.Memory{
155-
Min: pointer.To[int64](constants.CgroupApidReservedMemory),
156-
Low: pointer.To[int64](constants.CgroupApidReservedMemory * 2),
157-
Max: zeroIfRace(pointer.To[int64](constants.CgroupApidMaxMemory)),
158-
Swap: pointer.To[int64](0),
159-
},
160-
CPU: &cgroup2.CPU{
161-
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupApidMillicores))),
162-
},
106+
constants.CgroupApid: {
107+
Memory: &cgroup2.Memory{
108+
Min: pointer.To[int64](constants.CgroupApidReservedMemory),
109+
Low: pointer.To[int64](constants.CgroupApidReservedMemory * 2),
110+
Max: zeroIfRace(pointer.To[int64](constants.CgroupApidMaxMemory)),
111+
Swap: pointer.To[int64](0),
112+
},
113+
CPU: &cgroup2.CPU{
114+
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupApidMillicores))),
163115
},
164116
},
165-
{
166-
name: constants.CgroupTrustd,
167-
resources: &cgroup2.Resources{
168-
Memory: &cgroup2.Memory{
169-
Min: pointer.To[int64](constants.CgroupTrustdReservedMemory),
170-
Low: pointer.To[int64](constants.CgroupTrustdReservedMemory * 2),
171-
Max: zeroIfRace(pointer.To[int64](constants.CgroupTrustdMaxMemory)),
172-
Swap: pointer.To[int64](0),
173-
},
174-
CPU: &cgroup2.CPU{
175-
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupTrustdMillicores))),
176-
},
117+
constants.CgroupTrustd: {
118+
Memory: &cgroup2.Memory{
119+
Min: pointer.To[int64](constants.CgroupTrustdReservedMemory),
120+
Low: pointer.To[int64](constants.CgroupTrustdReservedMemory * 2),
121+
Max: zeroIfRace(pointer.To[int64](constants.CgroupTrustdMaxMemory)),
122+
Swap: pointer.To[int64](0),
123+
},
124+
CPU: &cgroup2.CPU{
125+
Weight: pointer.To[uint64](cgroup.MillicoresToCPUWeight(cgroup.MilliCores(constants.CgroupTrustdMillicores))),
177126
},
178127
},
179128
}
180129

181-
for _, c := range groups {
182-
if cgroups.Mode() == cgroups.Unified {
183-
resources := c.resources
130+
resources, ok := groups[name]
184131

185-
if rt.State().Platform().Mode().InContainer() {
186-
// don't attempt to set resources in container mode, as they might conflict with the parent cgroup tree
187-
resources = &cgroup2.Resources{}
188-
}
132+
if !ok || inContainer {
133+
// don't attempt to set resources in container mode, as they might conflict with the parent cgroup tree
134+
resources = &cgroup2.Resources{}
135+
}
189136

190-
cg, err := cgroup2.NewManager(constants.CgroupMountPath, cgroup.Path(c.name), resources)
191-
if err != nil {
192-
return fmt.Errorf("failed to create cgroup: %w", err)
193-
}
137+
cg, err := cgroup2.NewManager(constants.CgroupMountPath, cgroup.Path(name), resources)
138+
if err != nil {
139+
return nil, fmt.Errorf("failed to create cgroup: %w", err)
140+
}
194141

195-
if c.name == constants.CgroupInit {
196-
if err := cg.AddProc(uint64(os.Getpid())); err != nil {
197-
return fmt.Errorf("failed to move init process to cgroup: %w", err)
198-
}
142+
if name == constants.CgroupInit {
143+
if err := cg.AddProc(uint64(os.Getpid())); err != nil {
144+
return nil, fmt.Errorf("failed to move init process to cgroup: %w", err)
145+
}
146+
}
147+
148+
return cg, nil
149+
}
150+
151+
// CreateSystemCgroups creates system cgroups.
152+
func CreateSystemCgroups(ctx context.Context, log *zap.Logger, rt runtime.Runtime, next NextTaskFunc) error {
153+
// in container mode cgroups mode depends on cgroups provided by the container runtime
154+
if !rt.State().Platform().Mode().InContainer() {
155+
// assert that cgroupsv2 is being used when running not in container mode,
156+
// as Talos sets up cgroupsv2 on its own
157+
if cgroups.Mode() != cgroups.Unified {
158+
return errors.New("cgroupsv2 should be used")
159+
}
160+
}
161+
162+
// Initialize cgroups root path.
163+
if err := cgroup.InitRoot(); err != nil {
164+
return fmt.Errorf("error initializing cgroups root path: %w", err)
165+
}
166+
167+
log.Info("initializing cgroups", zap.String("root", cgroup.Root()))
168+
169+
groups := []string{
170+
constants.CgroupInit,
171+
constants.CgroupSystem,
172+
constants.CgroupPodRuntimeRoot,
173+
}
174+
175+
for _, c := range groups {
176+
if cgroups.Mode() == cgroups.Unified {
177+
_, err := CreateCgroupV2(c, rt.State().Platform().Mode().InContainer())
178+
if err != nil {
179+
return err
199180
}
200181
} else {
201-
cg, err := cgroup1.New(cgroup1.StaticPath(c.name), &specs.LinuxResources{})
182+
cg, err := cgroup1.New(cgroup1.StaticPath(c), &specs.LinuxResources{})
202183
if err != nil {
203184
return fmt.Errorf("failed to create cgroup: %w", err)
204185
}
205186

206-
if c.name == constants.CgroupInit {
187+
if c == constants.CgroupInit {
207188
if err := cg.Add(cgroup1.Process{
208189
Pid: os.Getpid(),
209190
}); err != nil {

internal/app/machined/pkg/system/runner/containerd/containerd.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"fmt"
1111
"io"
1212
"log"
13+
"strings"
1314
"syscall"
1415
"time"
1516

@@ -20,6 +21,8 @@ import (
2021
"github.com/containerd/containerd/v2/pkg/oci"
2122
"github.com/containerd/errdefs"
2223

24+
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
25+
"github.com/siderolabs/talos/internal/app/machined/pkg/startup"
2326
"github.com/siderolabs/talos/internal/app/machined/pkg/system/events"
2427
"github.com/siderolabs/talos/internal/app/machined/pkg/system/runner"
2528
"github.com/siderolabs/talos/internal/pkg/cgroup"
@@ -168,6 +171,21 @@ func (c *containerdRunner) Run(eventSink events.Recorder) error {
168171
return fmt.Errorf("error creating log: %w", err)
169172
}
170173

174+
cg, err := startup.CreateCgroupV2(c.opts.CgroupPath, runtime.ModeContainer.InContainer())
175+
if err != nil {
176+
return fmt.Errorf("error creating cgroup: %w", err)
177+
}
178+
179+
// If the task is not cleaned up by containerd or another error
180+
// happens during the lifecycle, remove the cgroup before exiting
181+
// if one still exists
182+
defer func() {
183+
err := cg.Delete()
184+
if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
185+
eventSink(events.StateStopping, "Failed to remove cgroup for %s, %s", c, err)
186+
}
187+
}()
188+
171189
defer logW.Close() //nolint:errcheck
172190

173191
var w io.Writer = logW

internal/app/machined/pkg/system/runner/process/process.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727

2828
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
2929
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform"
30+
"github.com/siderolabs/talos/internal/app/machined/pkg/startup"
3031
"github.com/siderolabs/talos/internal/app/machined/pkg/system/events"
3132
"github.com/siderolabs/talos/internal/app/machined/pkg/system/runner"
3233
"github.com/siderolabs/talos/internal/pkg/cgroup"
@@ -413,7 +414,20 @@ func setSchedulingPolicy(p *processRunner, pid int, schedulingPolicy uint) error
413414
return nil
414415
}
415416

417+
//nolint:gocyclo
416418
func (p *processRunner) run(eventSink events.Recorder) error {
419+
cg, err := startup.CreateCgroupV2(p.opts.CgroupPath, runtime.ModeContainer.InContainer())
420+
if err != nil {
421+
return fmt.Errorf("error creating cgroup: %w", err)
422+
}
423+
424+
defer func() {
425+
err := cg.Delete()
426+
if err != nil {
427+
eventSink(events.StateStopping, "Failed to remove cgroup for %s, %s", p, err)
428+
}
429+
}()
430+
417431
cmdWrapper, err := p.build()
418432
if err != nil {
419433
return fmt.Errorf("error building command: %w", err)

0 commit comments

Comments
 (0)