Skip to content

Commit 37a587c

Browse files
author
Amarthya Valija
committed
Add NodePool STS permissions test
1 parent d3ba311 commit 37a587c

File tree

1 file changed

+324
-0
lines changed

1 file changed

+324
-0
lines changed

pkg/e2e/verify/nodepool.go

Lines changed: 324 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,324 @@
1+
package verify
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"strings"
7+
"time"
8+
9+
"github.com/onsi/ginkgo/v2"
10+
. "github.com/onsi/gomega"
11+
12+
viper "github.com/openshift/osde2e/pkg/common/concurrentviper"
13+
"github.com/openshift/osde2e/pkg/common/config"
14+
"github.com/openshift/osde2e/pkg/common/expect"
15+
"github.com/openshift/osde2e/pkg/common/helper"
16+
"github.com/openshift/osde2e/pkg/common/label"
17+
corev1 "k8s.io/api/core/v1"
18+
apierrors "k8s.io/apimachinery/pkg/api/errors"
19+
"k8s.io/apimachinery/pkg/api/resource"
20+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21+
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
22+
"k8s.io/apimachinery/pkg/runtime/schema"
23+
"sigs.k8s.io/e2e-framework/klient/k8s/resources"
24+
"sigs.k8s.io/e2e-framework/klient/wait"
25+
"sigs.k8s.io/e2e-framework/klient/wait/conditions"
26+
)
27+
28+
const (
29+
nodePoolLabel = "hypershift.openshift.io/nodePool"
30+
hostedClusterLabel = "hypershift.openshift.io/hosted-cluster"
31+
)
32+
33+
var _ = ginkgo.Describe("[Suite: e2e] NodePool STS Permissions", ginkgo.Ordered, label.HyperShift, label.E2E, func() {
34+
var h *helper.H
35+
var client *resources.Resources
36+
var clusterNamespace string
37+
var testNodePoolName string
38+
39+
nodePoolGVR := schema.GroupVersionResource{
40+
Group: "hypershift.openshift.io",
41+
Version: "v1beta1",
42+
Resource: "nodepools",
43+
}
44+
45+
ginkgo.BeforeAll(func() {
46+
if !viper.GetBool(config.Hypershift) {
47+
ginkgo.Skip("NodePool tests are only supported on HyperShift clusters")
48+
}
49+
50+
if viper.GetString(config.CloudProvider.CloudProviderID) != "aws" {
51+
ginkgo.Skip("NodePool STS tests only supported on AWS HyperShift clusters")
52+
}
53+
54+
h = helper.New()
55+
client = h.AsUser("")
56+
57+
clusterNamespace = getClusterNamespace(h)
58+
if clusterNamespace == "" {
59+
ginkgo.Fail("Could not determine cluster namespace - cluster may not be properly configured")
60+
}
61+
62+
testNodePoolName = fmt.Sprintf("test-%d", time.Now().Unix()%100000)
63+
})
64+
65+
ginkgo.AfterAll(func() {
66+
ctx := context.Background()
67+
ginkgo.By("Cleaning up test NodePool")
68+
69+
err := h.Dynamic().Resource(nodePoolGVR).Namespace(clusterNamespace).
70+
Delete(ctx, testNodePoolName, metav1.DeleteOptions{})
71+
if err != nil && !apierrors.IsNotFound(err) {
72+
ginkgo.GinkgoLogr.Error(err, "Failed to cleanup test NodePool", "name", testNodePoolName)
73+
}
74+
_ = wait.For(func(ctx context.Context) (bool, error) {
75+
_, err := h.Dynamic().Resource(nodePoolGVR).Namespace(clusterNamespace).
76+
Get(ctx, testNodePoolName, metav1.GetOptions{})
77+
return apierrors.IsNotFound(err), nil
78+
}, wait.WithTimeout(3*time.Minute), wait.WithInterval(5*time.Second))
79+
})
80+
81+
ginkgo.It("should create NodePool with proper STS permissions", func(ctx context.Context) {
82+
ginkgo.By("Creating test NodePool to validate STS permissions")
83+
84+
subnet := getExistingSubnet(ctx, h, clusterNamespace)
85+
nodePoolSpec := buildNodePoolSpec(testNodePoolName, clusterNamespace, subnet)
86+
nodePoolObj := &unstructured.Unstructured{Object: nodePoolSpec}
87+
88+
_, err := h.Dynamic().Resource(nodePoolGVR).Namespace(clusterNamespace).
89+
Create(ctx, nodePoolObj, metav1.CreateOptions{})
90+
91+
if detectSTSPermissionError(err) {
92+
expect.NoError(fmt.Errorf(
93+
"STS_PERMISSION_ERROR: NodePool creation failed due to missing AWS permissions (ec2:RunInstances, ec2:CreateTags): %v. "+
94+
"This blocks release until STS policies are updated", err))
95+
}
96+
expect.NoError(err, "NodePool creation failed")
97+
98+
ginkgo.GinkgoLogr.Info("NodePool created successfully - STS permissions validated",
99+
"nodepool", testNodePoolName, "permissions", []string{"ec2:RunInstances", "ec2:CreateTags"})
100+
})
101+
102+
ginkgo.It("should provision nodes with AWS integration", func(ctx context.Context) {
103+
ginkgo.By("Waiting for NodePool to become Ready")
104+
105+
err := wait.For(func(ctx context.Context) (bool, error) {
106+
np, err := h.Dynamic().Resource(nodePoolGVR).Namespace(clusterNamespace).
107+
Get(ctx, testNodePoolName, metav1.GetOptions{})
108+
if err != nil { return false, err }
109+
ready, _, _ := unstructured.NestedInt64(np.Object, "status", "readyReplicas")
110+
return ready >= 1, nil
111+
}, wait.WithTimeout(10*time.Minute), wait.WithInterval(15*time.Second))
112+
expect.NoError(err, "NodePool did not report ready replicas")
113+
114+
ginkgo.By("Waiting for nodes to be provisioned")
115+
116+
var testNode *corev1.Node
117+
err = wait.For(func(ctx context.Context) (bool, error) {
118+
var nodeList corev1.NodeList
119+
err := client.List(ctx, &nodeList)
120+
if err != nil {
121+
return false, err
122+
}
123+
124+
for _, node := range nodeList.Items {
125+
if labelValue, exists := node.Labels[nodePoolLabel]; exists {
126+
if strings.Contains(labelValue, testNodePoolName) && isNodeReady(node) {
127+
testNode = &node
128+
return true, nil
129+
}
130+
}
131+
}
132+
return false, nil
133+
}, wait.WithTimeout(15*time.Minute), wait.WithInterval(30*time.Second))
134+
135+
if detectSTSPermissionError(err) {
136+
expect.NoError(fmt.Errorf(
137+
"STS_PERMISSION_ERROR: Node provisioning failed due to missing AWS permissions (ec2:RunInstances): %v. "+
138+
"This blocks release until STS policies are updated", err))
139+
}
140+
expect.NoError(err, "NodePool failed to provision nodes")
141+
142+
ginkgo.By("Validating AWS integration")
143+
Expect(testNode).ToNot(BeNil(), "Test node should be available")
144+
145+
// Validate AWS provider ID
146+
Expect(testNode.Spec.ProviderID).To(HavePrefix("aws://"),
147+
"Node should have AWS provider ID - ec2:DescribeInstances permission may be missing")
148+
149+
hasInternalIP := false
150+
for _, addr := range testNode.Status.Addresses {
151+
if addr.Type == corev1.NodeInternalIP {
152+
hasInternalIP = true
153+
break
154+
}
155+
}
156+
Expect(hasInternalIP).To(BeTrue(), "Node should have InternalIP")
157+
158+
159+
ginkgo.GinkgoLogr.Info("Node provisioning validated - STS permissions working",
160+
"node", testNode.Name, "permissions", []string{"ec2:RunInstances", "ec2:DescribeInstances"})
161+
})
162+
163+
ginkgo.It("should schedule workloads on NodePool nodes", func(ctx context.Context) {
164+
ginkgo.By("Creating test workload targeted at NodePool")
165+
166+
pod := &corev1.Pod{
167+
ObjectMeta: metav1.ObjectMeta{
168+
GenerateName: "nodepool-test-",
169+
Namespace: h.CurrentProject(),
170+
},
171+
Spec: corev1.PodSpec{
172+
NodeSelector: map[string]string{
173+
nodePoolLabel: fmt.Sprintf("%s-%s", clusterNamespace, testNodePoolName),
174+
},
175+
Containers: []corev1.Container{{
176+
Name: "test",
177+
Image: "registry.access.redhat.com/ubi8/ubi-minimal",
178+
Command: []string{"/bin/sh", "-c", "echo 'NodePool test successful' && sleep 5"},
179+
Resources: corev1.ResourceRequirements{
180+
Requests: corev1.ResourceList{
181+
corev1.ResourceCPU: resource.MustParse("100m"),
182+
corev1.ResourceMemory: resource.MustParse("128Mi"),
183+
},
184+
},
185+
}},
186+
RestartPolicy: corev1.RestartPolicyNever,
187+
},
188+
}
189+
190+
err := client.Create(ctx, pod)
191+
expect.NoError(err, "Failed to create test pod")
192+
defer client.Delete(ctx, pod)
193+
194+
ginkgo.By("Waiting for workload to complete successfully")
195+
196+
err = wait.For(conditions.New(client).PodPhaseMatch(pod, corev1.PodSucceeded), wait.WithTimeout(5*time.Minute))
197+
expect.NoError(err, "Workload scheduling failed on NodePool")
198+
199+
ginkgo.GinkgoLogr.Info("Workload scheduling validated - NodePool functional")
200+
})
201+
202+
ginkgo.It("should reject duplicate NodePool names", func(ctx context.Context) {
203+
ginkgo.By("Testing duplicate NodePool creation")
204+
205+
duplicateSpec := buildNodePoolSpec(testNodePoolName, clusterNamespace, "")
206+
duplicateObj := &unstructured.Unstructured{Object: duplicateSpec}
207+
208+
_, err := h.Dynamic().Resource(nodePoolGVR).Namespace(clusterNamespace).
209+
Create(ctx, duplicateObj, metav1.CreateOptions{})
210+
Expect(err).To(HaveOccurred(), "Should fail when creating NodePool with duplicate name")
211+
Expect(apierrors.IsAlreadyExists(err)).To(BeTrue(), "Expected AlreadyExists on duplicate NodePool")
212+
})
213+
214+
ginkgo.It("should reject operations on non-existent NodePool", func(ctx context.Context) {
215+
ginkgo.By("Testing access to non-existent NodePool")
216+
217+
_, err := h.Dynamic().Resource(nodePoolGVR).Namespace(clusterNamespace).
218+
Get(ctx, "non-existent-nodepool", metav1.GetOptions{})
219+
220+
Expect(err).To(HaveOccurred(), "Getting non-existent NodePool should fail")
221+
Expect(apierrors.IsNotFound(err)).To(BeTrue(), "Should return NotFound error")
222+
})
223+
})
224+
225+
func getClusterNamespace(h *helper.H) string {
226+
gvr := schema.GroupVersionResource{
227+
Group: "hypershift.openshift.io", Version: "v1beta1", Resource: "nodepools",
228+
}
229+
nps, err := h.Dynamic().Resource(gvr).List(context.Background(), metav1.ListOptions{})
230+
if err == nil && len(nps.Items) > 0 {
231+
return nps.Items[0].GetNamespace()
232+
}
233+
currentProject := h.CurrentProject()
234+
if strings.HasPrefix(currentProject, "clusters-") {
235+
return strings.TrimPrefix(currentProject, "clusters-")
236+
}
237+
238+
return ""
239+
}
240+
241+
func getExistingSubnet(ctx context.Context, h *helper.H, namespace string) string {
242+
nodePoolGVR := schema.GroupVersionResource{
243+
Group: "hypershift.openshift.io",
244+
Version: "v1beta1",
245+
Resource: "nodepools",
246+
}
247+
248+
nodePoolList, err := h.Dynamic().Resource(nodePoolGVR).Namespace(namespace).
249+
List(ctx, metav1.ListOptions{})
250+
if err != nil || len(nodePoolList.Items) == 0 {
251+
return ""
252+
}
253+
254+
for _, nodePool := range nodePoolList.Items {
255+
if spec, found, err := unstructured.NestedMap(nodePool.Object, "spec"); found && err == nil {
256+
if platform, found, err := unstructured.NestedMap(spec, "platform"); found && err == nil {
257+
if aws, found, err := unstructured.NestedMap(platform, "aws"); found && err == nil {
258+
if subnet, found, err := unstructured.NestedString(aws, "subnet"); found && err == nil {
259+
return subnet
260+
}
261+
}
262+
}
263+
}
264+
}
265+
return ""
266+
}
267+
268+
func buildNodePoolSpec(name, namespace, subnet string) map[string]interface{} {
269+
awsConfig := map[string]interface{}{
270+
"instanceType": "m5.large",
271+
}
272+
273+
if subnet != "" {
274+
awsConfig["subnet"] = subnet
275+
}
276+
277+
return map[string]interface{}{
278+
"apiVersion": "hypershift.openshift.io/v1beta1",
279+
"kind": "NodePool",
280+
"metadata": map[string]interface{}{
281+
"name": name,
282+
"namespace": namespace,
283+
},
284+
"spec": map[string]interface{}{
285+
"clusterName": namespace,
286+
"replicas": 1,
287+
"management": map[string]interface{}{
288+
"autoRepair": true,
289+
"upgradeType": "Replace",
290+
},
291+
"platform": map[string]interface{}{
292+
"aws": awsConfig,
293+
},
294+
},
295+
}
296+
}
297+
298+
func detectSTSPermissionError(err error) bool {
299+
if err == nil {
300+
return false
301+
}
302+
303+
errorMsg := strings.ToLower(err.Error())
304+
stsPatterns := []string{
305+
"accessdenied", "unauthorizedoperation", "forbidden",
306+
"invalid iam role", "sts permissions", "assumerolewithwebidentity",
307+
}
308+
309+
for _, pattern := range stsPatterns {
310+
if strings.Contains(errorMsg, pattern) {
311+
return true
312+
}
313+
}
314+
return false
315+
}
316+
317+
func isNodeReady(node corev1.Node) bool {
318+
for _, condition := range node.Status.Conditions {
319+
if condition.Type == corev1.NodeReady && condition.Status == corev1.ConditionTrue {
320+
return true
321+
}
322+
}
323+
return false
324+
}

0 commit comments

Comments
 (0)