Skip to content

Commit 2fc11fd

Browse files
author
Amarthya Valija
committed
Add NodePool STS permissions test
1 parent d3ba311 commit 2fc11fd

File tree

1 file changed

+232
-0
lines changed

1 file changed

+232
-0
lines changed

pkg/e2e/verify/nodepool.go

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
package verify
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"os"
7+
"os/exec"
8+
"strings"
9+
"time"
10+
11+
"github.com/onsi/ginkgo/v2"
12+
13+
"github.com/openshift/osde2e/pkg/common/aws"
14+
"github.com/openshift/osde2e/pkg/common/expect"
15+
"github.com/openshift/osde2e/pkg/common/helper"
16+
"github.com/openshift/osde2e/pkg/common/label"
17+
18+
corev1 "k8s.io/api/core/v1"
19+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
20+
"k8s.io/apimachinery/pkg/api/resource"
21+
"sigs.k8s.io/e2e-framework/klient/k8s/resources"
22+
"sigs.k8s.io/e2e-framework/klient/wait"
23+
)
24+
25+
// NodePool STS Permissions Test
26+
// This test validates day 2 operations for ROSA HCP clusters by creating NodePools.
27+
// It ensures required STS permissions (ec2:RunInstances, ec2:CreateTags, ec2:DescribeInstances)
28+
29+
var nodePoolTestName = "[Suite: e2e] NodePool STS Permissions"
30+
31+
var _ = ginkgo.Describe(nodePoolTestName, ginkgo.Ordered, label.E2E, func() {
32+
var h *helper.H
33+
var client *resources.Resources
34+
35+
ginkgo.BeforeAll(func() {
36+
h = helper.New()
37+
client = h.AsUser("")
38+
})
39+
40+
ginkgo.It("should create nodepool and provision EC2 instances with proper STS permissions", func(ctx context.Context) {
41+
var nodes corev1.NodeList
42+
expect.NoError(client.List(ctx, &nodes))
43+
44+
if len(nodes.Items) == 0 {
45+
ginkgo.Skip("No nodes found")
46+
}
47+
48+
clusterName := extractClusterName(nodes.Items)
49+
clusterID := getClusterID(nodes.Items)
50+
51+
if clusterName == "" || clusterID == "" {
52+
ginkgo.Skip("Could not determine cluster information")
53+
}
54+
55+
nodePoolName := fmt.Sprintf("osde2e-sts-test-%d", time.Now().Unix())
56+
57+
ginkgo.By("Creating NodePool to test STS permissions for day 2 operations")
58+
err := createNodePool(clusterID, nodePoolName)
59+
60+
if err != nil {
61+
expect.NoError(fmt.Errorf("NodePool creation failed - STS permissions missing %v.", err))
62+
}
63+
64+
// Cleanup
65+
defer deleteNodePool(clusterID, nodePoolName)
66+
67+
ginkgo.By("Waiting for new nodes from NodePool (tests ec2:RunInstances, ec2:CreateTags)")
68+
newNodes := waitForNewNodes(ctx, client, len(nodes.Items), nodePoolName)
69+
70+
if len(newNodes) == 0 {
71+
expect.NoError(fmt.Errorf("NodePool failed to provision nodes - STS permissions (ec2:RunInstances, ec2:CreateTags) missing."))
72+
}
73+
74+
ginkgo.By("Validating nodes exist in AWS (tests ec2:DescribeInstances)")
75+
for _, node := range newNodes {
76+
exists, err := aws.CcsAwsSession.CheckIfEC2ExistBasedOnNodeName(node.Name)
77+
if err != nil {
78+
expect.NoError(fmt.Errorf("AWS validation failed for node %s - STS permissions (ec2:DescribeInstances) missing: %v.", node.Name, err))
79+
}
80+
if !exists {
81+
expect.NoError(fmt.Errorf("Node %s not found in AWS - STS permissions (ec2:DescribeInstances) missing.", node.Name))
82+
}
83+
}
84+
85+
ginkgo.By("Testing workload scheduling on new NodePool")
86+
err = testWorkload(ctx, client, h, nodePoolName)
87+
if err != nil {
88+
expect.NoError(fmt.Errorf("Workload scheduling failed on NodePool: %v.", err))
89+
}
90+
91+
ginkgo.By("NodePool operations completed successfully - STS permissions working")
92+
ginkgo.GinkgoLogr.Info("NodePool STS permissions test PASSED",
93+
"cluster", clusterName,
94+
"nodepool", nodePoolName,
95+
"nodes_created", len(newNodes))
96+
})
97+
})
98+
99+
func extractClusterName(nodes []corev1.Node) string {
100+
for _, node := range nodes {
101+
if label, exists := node.Labels["hypershift.openshift.io/nodePool"]; exists {
102+
if strings.Contains(label, "-workers-") {
103+
return strings.Split(label, "-workers-")[0]
104+
}
105+
}
106+
}
107+
return ""
108+
}
109+
110+
func getClusterID(nodes []corev1.Node) string {
111+
if clusterID := os.Getenv("CLUSTER_ID"); clusterID != "" {
112+
return clusterID
113+
}
114+
115+
for _, node := range nodes {
116+
if strings.Contains(node.Name, "-") {
117+
parts := strings.Split(node.Name, "-")
118+
if len(parts) > 0 && len(parts[0]) > 10 {
119+
return parts[0]
120+
}
121+
}
122+
}
123+
return ""
124+
}
125+
126+
func createNodePool(clusterID, nodePoolName string) error {
127+
if _, err := exec.Command("rosa", "whoami").CombinedOutput(); err != nil {
128+
if token := os.Getenv("OCM_TOKEN"); token != "" {
129+
exec.Command("rosa", "login", "--env", "stage", "--token", token).Run()
130+
}
131+
}
132+
133+
cmd := exec.Command("rosa", "create", "machinepool",
134+
"--cluster", clusterID,
135+
"--name", nodePoolName,
136+
"--replicas", "1",
137+
"--instance-type", "m5.large",
138+
"--yes")
139+
140+
_, err := cmd.CombinedOutput()
141+
return err
142+
}
143+
144+
func deleteNodePool(clusterID, nodePoolName string) {
145+
exec.Command("rosa", "delete", "machinepool", nodePoolName, "--cluster", clusterID, "--yes").Run()
146+
}
147+
148+
func isWorkerNode(node corev1.Node) bool {
149+
_, isMaster := node.Labels["node-role.kubernetes.io/master"]
150+
_, isControlPlane := node.Labels["node-role.kubernetes.io/control-plane"]
151+
return !isMaster && !isControlPlane
152+
}
153+
154+
func waitForNewNodes(ctx context.Context, client *resources.Resources, initialCount int, nodePoolName string) []corev1.Node {
155+
var newNodes []corev1.Node
156+
157+
wait.For(func(ctx context.Context) (bool, error) {
158+
var currentNodes corev1.NodeList
159+
if err := client.List(ctx, &currentNodes); err != nil {
160+
return false, err
161+
}
162+
163+
newNodes = nil
164+
if len(currentNodes.Items) > initialCount {
165+
for _, node := range currentNodes.Items {
166+
if label, exists := node.Labels["hypershift.openshift.io/nodePool"]; exists && label == nodePoolName {
167+
if isNodeReady(node) {
168+
newNodes = append(newNodes, node)
169+
}
170+
}
171+
}
172+
}
173+
return len(newNodes) > 0, nil
174+
}, wait.WithTimeout(20*time.Minute), wait.WithInterval(30*time.Second))
175+
176+
return newNodes
177+
}
178+
179+
func isNodeReady(node corev1.Node) bool {
180+
for _, condition := range node.Status.Conditions {
181+
if condition.Type == corev1.NodeReady && condition.Status == corev1.ConditionTrue {
182+
return true
183+
}
184+
}
185+
return false
186+
}
187+
188+
func testWorkload(ctx context.Context, client *resources.Resources, h *helper.H, nodePoolName string) error {
189+
pod := &corev1.Pod{
190+
ObjectMeta: metav1.ObjectMeta{
191+
GenerateName: "nodepool-test-",
192+
Namespace: h.CurrentProject(),
193+
},
194+
Spec: corev1.PodSpec{
195+
NodeSelector: map[string]string{
196+
"hypershift.openshift.io/nodePool": nodePoolName,
197+
},
198+
Containers: []corev1.Container{{
199+
Name: "test",
200+
Image: "registry.access.redhat.com/ubi8/ubi-minimal",
201+
Command: []string{"/bin/sh", "-c", "echo 'Success' && sleep 10"},
202+
Resources: corev1.ResourceRequirements{
203+
Requests: corev1.ResourceList{
204+
corev1.ResourceCPU: resource.MustParse("100m"),
205+
corev1.ResourceMemory: resource.MustParse("128Mi"),
206+
},
207+
},
208+
}},
209+
RestartPolicy: corev1.RestartPolicyNever,
210+
},
211+
}
212+
213+
if err := client.Create(ctx, pod); err != nil {
214+
return fmt.Errorf("failed to create test pod: %v", err)
215+
}
216+
defer client.Delete(ctx, pod)
217+
218+
// Wait for pod success
219+
err := wait.For(func(ctx context.Context) (bool, error) {
220+
p := &corev1.Pod{}
221+
if err := client.Get(ctx, pod.GetName(), pod.GetNamespace(), p); err != nil {
222+
return false, err
223+
}
224+
return p.Status.Phase == corev1.PodSucceeded, nil
225+
}, wait.WithTimeout(3*time.Minute))
226+
227+
if err != nil {
228+
return fmt.Errorf("test pod failed to complete successfully: %v", err)
229+
}
230+
231+
return nil
232+
}

0 commit comments

Comments
 (0)