1
+ package verify
2
+
3
+ import (
4
+ "context"
5
+ "fmt"
6
+ "strings"
7
+ "time"
8
+
9
+ "github.com/onsi/ginkgo/v2"
10
+ . "github.com/onsi/gomega"
11
+
12
+ viper "github.com/openshift/osde2e/pkg/common/concurrentviper"
13
+ "github.com/openshift/osde2e/pkg/common/config"
14
+ "github.com/openshift/osde2e/pkg/common/expect"
15
+ "github.com/openshift/osde2e/pkg/common/helper"
16
+ "github.com/openshift/osde2e/pkg/common/label"
17
+ corev1 "k8s.io/api/core/v1"
18
+ apierrors "k8s.io/apimachinery/pkg/api/errors"
19
+ "k8s.io/apimachinery/pkg/api/resource"
20
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21
+ "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
22
+ "k8s.io/apimachinery/pkg/runtime/schema"
23
+ "sigs.k8s.io/e2e-framework/klient/k8s/resources"
24
+ "sigs.k8s.io/e2e-framework/klient/wait"
25
+ "sigs.k8s.io/e2e-framework/klient/wait/conditions"
26
+ )
27
+
28
+ const (
29
+ nodePoolLabel = "hypershift.openshift.io/nodePool"
30
+ hostedClusterLabel = "hypershift.openshift.io/hosted-cluster"
31
+ )
32
+
33
+ var _ = ginkgo .Describe ("[Suite: e2e] NodePool STS Permissions" , ginkgo .Ordered , label .HyperShift , label .E2E , func () {
34
+ var h * helper.H
35
+ var client * resources.Resources
36
+ var clusterNamespace string
37
+ var testNodePoolName string
38
+
39
+ nodePoolGVR := schema.GroupVersionResource {
40
+ Group : "hypershift.openshift.io" ,
41
+ Version : "v1beta1" ,
42
+ Resource : "nodepools" ,
43
+ }
44
+
45
+ ginkgo .BeforeAll (func () {
46
+ if ! viper .GetBool (config .Hypershift ) {
47
+ ginkgo .Skip ("NodePool tests are only supported on HyperShift clusters" )
48
+ }
49
+
50
+ if viper .GetString (config .CloudProvider .CloudProviderID ) != "aws" {
51
+ ginkgo .Skip ("NodePool STS tests only supported on AWS HyperShift clusters" )
52
+ }
53
+
54
+ h = helper .New ()
55
+ client = h .AsUser ("" )
56
+
57
+ clusterNamespace = getClusterNamespace (h )
58
+ if clusterNamespace == "" {
59
+ ginkgo .Fail ("Could not determine cluster namespace - cluster may not be properly configured" )
60
+ }
61
+
62
+ testNodePoolName = fmt .Sprintf ("test-%d" , time .Now ().Unix ()% 100000 )
63
+ })
64
+
65
+ ginkgo .AfterAll (func () {
66
+ ctx := context .Background ()
67
+ ginkgo .By ("Cleaning up test NodePool" )
68
+
69
+ err := h .Dynamic ().Resource (nodePoolGVR ).Namespace (clusterNamespace ).
70
+ Delete (ctx , testNodePoolName , metav1.DeleteOptions {})
71
+ if err != nil && ! apierrors .IsNotFound (err ) {
72
+ ginkgo .GinkgoLogr .Error (err , "Failed to cleanup test NodePool" , "name" , testNodePoolName )
73
+ }
74
+ _ = wait .For (func (ctx context.Context ) (bool , error ) {
75
+ _ , err := h .Dynamic ().Resource (nodePoolGVR ).Namespace (clusterNamespace ).
76
+ Get (ctx , testNodePoolName , metav1.GetOptions {})
77
+ return apierrors .IsNotFound (err ), nil
78
+ }, wait .WithTimeout (3 * time .Minute ), wait .WithInterval (5 * time .Second ))
79
+ })
80
+
81
+ ginkgo .It ("should create NodePool with proper STS permissions" , func (ctx context.Context ) {
82
+ ginkgo .By ("Creating test NodePool to validate STS permissions" )
83
+
84
+ subnet := getExistingSubnet (ctx , h , clusterNamespace )
85
+ nodePoolSpec := buildNodePoolSpec (testNodePoolName , clusterNamespace , subnet )
86
+ nodePoolObj := & unstructured.Unstructured {Object : nodePoolSpec }
87
+
88
+ _ , err := h .Dynamic ().Resource (nodePoolGVR ).Namespace (clusterNamespace ).
89
+ Create (ctx , nodePoolObj , metav1.CreateOptions {})
90
+
91
+ if detectSTSPermissionError (err ) {
92
+ expect .NoError (fmt .Errorf (
93
+ "STS_PERMISSION_ERROR: NodePool creation failed due to missing AWS permissions (ec2:RunInstances, ec2:CreateTags): %v. " +
94
+ "This blocks release until STS policies are updated" , err ))
95
+ }
96
+ expect .NoError (err , "NodePool creation failed" )
97
+
98
+ ginkgo .GinkgoLogr .Info ("NodePool created successfully - STS permissions validated" ,
99
+ "nodepool" , testNodePoolName , "permissions" , []string {"ec2:RunInstances" , "ec2:CreateTags" })
100
+ })
101
+
102
+ ginkgo .It ("should provision nodes with AWS integration" , func (ctx context.Context ) {
103
+ ginkgo .By ("Waiting for NodePool to become Ready" )
104
+
105
+ err := wait .For (func (ctx context.Context ) (bool , error ) {
106
+ np , err := h .Dynamic ().Resource (nodePoolGVR ).Namespace (clusterNamespace ).
107
+ Get (ctx , testNodePoolName , metav1.GetOptions {})
108
+ if err != nil { return false , err }
109
+ ready , _ , _ := unstructured .NestedInt64 (np .Object , "status" , "readyReplicas" )
110
+ return ready >= 1 , nil
111
+ }, wait .WithTimeout (10 * time .Minute ), wait .WithInterval (15 * time .Second ))
112
+ expect .NoError (err , "NodePool did not report ready replicas" )
113
+
114
+ ginkgo .By ("Waiting for nodes to be provisioned" )
115
+
116
+ var testNode * corev1.Node
117
+ err = wait .For (func (ctx context.Context ) (bool , error ) {
118
+ var nodeList corev1.NodeList
119
+ err := client .List (ctx , & nodeList )
120
+ if err != nil {
121
+ return false , err
122
+ }
123
+
124
+ for _ , node := range nodeList .Items {
125
+ if labelValue , exists := node .Labels [nodePoolLabel ]; exists {
126
+ if strings .Contains (labelValue , testNodePoolName ) && isNodeReady (node ) {
127
+ testNode = & node
128
+ return true , nil
129
+ }
130
+ }
131
+ }
132
+ return false , nil
133
+ }, wait .WithTimeout (15 * time .Minute ), wait .WithInterval (30 * time .Second ))
134
+
135
+ if detectSTSPermissionError (err ) {
136
+ expect .NoError (fmt .Errorf (
137
+ "STS_PERMISSION_ERROR: Node provisioning failed due to missing AWS permissions (ec2:RunInstances): %v. " +
138
+ "This blocks release until STS policies are updated" , err ))
139
+ }
140
+ expect .NoError (err , "NodePool failed to provision nodes" )
141
+
142
+ ginkgo .By ("Validating AWS integration" )
143
+ Expect (testNode ).ToNot (BeNil (), "Test node should be available" )
144
+
145
+ // Validate AWS provider ID
146
+ Expect (testNode .Spec .ProviderID ).To (HavePrefix ("aws://" ),
147
+ "Node should have AWS provider ID - ec2:DescribeInstances permission may be missing" )
148
+
149
+ hasInternalIP := false
150
+ for _ , addr := range testNode .Status .Addresses {
151
+ if addr .Type == corev1 .NodeInternalIP {
152
+ hasInternalIP = true
153
+ break
154
+ }
155
+ }
156
+ Expect (hasInternalIP ).To (BeTrue (), "Node should have InternalIP" )
157
+
158
+
159
+ ginkgo .GinkgoLogr .Info ("Node provisioning validated - STS permissions working" ,
160
+ "node" , testNode .Name , "permissions" , []string {"ec2:RunInstances" , "ec2:DescribeInstances" })
161
+ })
162
+
163
+ ginkgo .It ("should schedule workloads on NodePool nodes" , func (ctx context.Context ) {
164
+ ginkgo .By ("Creating test workload targeted at NodePool" )
165
+
166
+ pod := & corev1.Pod {
167
+ ObjectMeta : metav1.ObjectMeta {
168
+ GenerateName : "nodepool-test-" ,
169
+ Namespace : h .CurrentProject (),
170
+ },
171
+ Spec : corev1.PodSpec {
172
+ NodeSelector : map [string ]string {
173
+ nodePoolLabel : fmt .Sprintf ("%s-%s" , clusterNamespace , testNodePoolName ),
174
+ },
175
+ Containers : []corev1.Container {{
176
+ Name : "test" ,
177
+ Image : "registry.access.redhat.com/ubi8/ubi-minimal" ,
178
+ Command : []string {"/bin/sh" , "-c" , "echo 'NodePool test successful' && sleep 5" },
179
+ Resources : corev1.ResourceRequirements {
180
+ Requests : corev1.ResourceList {
181
+ corev1 .ResourceCPU : resource .MustParse ("100m" ),
182
+ corev1 .ResourceMemory : resource .MustParse ("128Mi" ),
183
+ },
184
+ },
185
+ }},
186
+ RestartPolicy : corev1 .RestartPolicyNever ,
187
+ },
188
+ }
189
+
190
+ err := client .Create (ctx , pod )
191
+ expect .NoError (err , "Failed to create test pod" )
192
+ defer client .Delete (ctx , pod )
193
+
194
+ ginkgo .By ("Waiting for workload to complete successfully" )
195
+
196
+ err = wait .For (conditions .New (client ).PodPhaseMatch (pod , corev1 .PodSucceeded ), wait .WithTimeout (5 * time .Minute ))
197
+ expect .NoError (err , "Workload scheduling failed on NodePool" )
198
+
199
+ ginkgo .GinkgoLogr .Info ("Workload scheduling validated - NodePool functional" )
200
+ })
201
+
202
+ ginkgo .It ("should reject duplicate NodePool names" , func (ctx context.Context ) {
203
+ ginkgo .By ("Testing duplicate NodePool creation" )
204
+
205
+ duplicateSpec := buildNodePoolSpec (testNodePoolName , clusterNamespace , "" )
206
+ duplicateObj := & unstructured.Unstructured {Object : duplicateSpec }
207
+
208
+ _ , err := h .Dynamic ().Resource (nodePoolGVR ).Namespace (clusterNamespace ).
209
+ Create (ctx , duplicateObj , metav1.CreateOptions {})
210
+ Expect (err ).To (HaveOccurred (), "Should fail when creating NodePool with duplicate name" )
211
+ Expect (apierrors .IsAlreadyExists (err )).To (BeTrue (), "Expected AlreadyExists on duplicate NodePool" )
212
+ })
213
+
214
+ ginkgo .It ("should reject operations on non-existent NodePool" , func (ctx context.Context ) {
215
+ ginkgo .By ("Testing access to non-existent NodePool" )
216
+
217
+ _ , err := h .Dynamic ().Resource (nodePoolGVR ).Namespace (clusterNamespace ).
218
+ Get (ctx , "non-existent-nodepool" , metav1.GetOptions {})
219
+
220
+ Expect (err ).To (HaveOccurred (), "Getting non-existent NodePool should fail" )
221
+ Expect (apierrors .IsNotFound (err )).To (BeTrue (), "Should return NotFound error" )
222
+ })
223
+ })
224
+
225
+ func getClusterNamespace (h * helper.H ) string {
226
+ gvr := schema.GroupVersionResource {
227
+ Group : "hypershift.openshift.io" , Version : "v1beta1" , Resource : "nodepools" ,
228
+ }
229
+ nps , err := h .Dynamic ().Resource (gvr ).List (context .Background (), metav1.ListOptions {})
230
+ if err == nil && len (nps .Items ) > 0 {
231
+ return nps .Items [0 ].GetNamespace ()
232
+ }
233
+ currentProject := h .CurrentProject ()
234
+ if strings .HasPrefix (currentProject , "clusters-" ) {
235
+ return strings .TrimPrefix (currentProject , "clusters-" )
236
+ }
237
+
238
+ return ""
239
+ }
240
+
241
+ func getExistingSubnet (ctx context.Context , h * helper.H , namespace string ) string {
242
+ nodePoolGVR := schema.GroupVersionResource {
243
+ Group : "hypershift.openshift.io" ,
244
+ Version : "v1beta1" ,
245
+ Resource : "nodepools" ,
246
+ }
247
+
248
+ nodePoolList , err := h .Dynamic ().Resource (nodePoolGVR ).Namespace (namespace ).
249
+ List (ctx , metav1.ListOptions {})
250
+ if err != nil || len (nodePoolList .Items ) == 0 {
251
+ return ""
252
+ }
253
+
254
+ for _ , nodePool := range nodePoolList .Items {
255
+ if spec , found , err := unstructured .NestedMap (nodePool .Object , "spec" ); found && err == nil {
256
+ if platform , found , err := unstructured .NestedMap (spec , "platform" ); found && err == nil {
257
+ if aws , found , err := unstructured .NestedMap (platform , "aws" ); found && err == nil {
258
+ if subnet , found , err := unstructured .NestedString (aws , "subnet" ); found && err == nil {
259
+ return subnet
260
+ }
261
+ }
262
+ }
263
+ }
264
+ }
265
+ return ""
266
+ }
267
+
268
+ func buildNodePoolSpec (name , namespace , subnet string ) map [string ]interface {} {
269
+ awsConfig := map [string ]interface {}{
270
+ "instanceType" : "m5.large" ,
271
+ }
272
+
273
+ if subnet != "" {
274
+ awsConfig ["subnet" ] = subnet
275
+ }
276
+
277
+ return map [string ]interface {}{
278
+ "apiVersion" : "hypershift.openshift.io/v1beta1" ,
279
+ "kind" : "NodePool" ,
280
+ "metadata" : map [string ]interface {}{
281
+ "name" : name ,
282
+ "namespace" : namespace ,
283
+ },
284
+ "spec" : map [string ]interface {}{
285
+ "clusterName" : namespace ,
286
+ "replicas" : 1 ,
287
+ "management" : map [string ]interface {}{
288
+ "autoRepair" : true ,
289
+ "upgradeType" : "Replace" ,
290
+ },
291
+ "platform" : map [string ]interface {}{
292
+ "aws" : awsConfig ,
293
+ },
294
+ },
295
+ }
296
+ }
297
+
298
+ func detectSTSPermissionError (err error ) bool {
299
+ if err == nil {
300
+ return false
301
+ }
302
+
303
+ errorMsg := strings .ToLower (err .Error ())
304
+ stsPatterns := []string {
305
+ "accessdenied" , "unauthorizedoperation" , "forbidden" ,
306
+ "invalid iam role" , "sts permissions" , "assumerolewithwebidentity" ,
307
+ }
308
+
309
+ for _ , pattern := range stsPatterns {
310
+ if strings .Contains (errorMsg , pattern ) {
311
+ return true
312
+ }
313
+ }
314
+ return false
315
+ }
316
+
317
+ func isNodeReady (node corev1.Node ) bool {
318
+ for _ , condition := range node .Status .Conditions {
319
+ if condition .Type == corev1 .NodeReady && condition .Status == corev1 .ConditionTrue {
320
+ return true
321
+ }
322
+ }
323
+ return false
324
+ }
0 commit comments