Skip to content

Commit 7d0e358

Browse files
Merge pull request #1360 from jubittajohn/enable-safe-member-deletion-during-revision-rollout
OCPBUGS-17199: Separated the revision stability check from the bootstrap completeness check
2 parents 1390eb4 + 8ffe961 commit 7d0e358

File tree

4 files changed

+80
-50
lines changed

4 files changed

+80
-50
lines changed

pkg/operator/ceohelpers/bootstrap.go

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -107,16 +107,24 @@ func CheckSafeToScaleCluster(
107107
infraLister configv1listers.InfrastructureLister,
108108
etcdClient etcdcli.AllMemberLister) error {
109109

110-
bootstrapComplete, err := IsBootstrapComplete(configmapLister, staticPodClient, etcdClient)
110+
bootstrapComplete, err := IsBootstrapComplete(configmapLister, etcdClient)
111111
if err != nil {
112112
return fmt.Errorf("CheckSafeToScaleCluster failed to determine bootstrap status: %w", err)
113113
}
114-
115114
// while bootstrapping, scaling should be considered safe always
116115
if !bootstrapComplete {
117116
return nil
118117
}
119118

119+
revisionStable, err := IsRevisionStable(staticPodClient)
120+
if err != nil {
121+
return fmt.Errorf("CheckSafeToScaleCluster failed to determine stability of revisions: %w", err)
122+
}
123+
// when revision is stabilising, scaling should be considered safe always
124+
if !revisionStable {
125+
return nil
126+
}
127+
120128
scalingStrategy, err := GetBootstrapScalingStrategy(staticPodClient, namespaceLister, infraLister)
121129
if err != nil {
122130
return fmt.Errorf("CheckSafeToScaleCluster failed to get bootstrap scaling strategy: %w", err)
@@ -151,14 +159,30 @@ func CheckSafeToScaleCluster(
151159
}
152160

153161
// IsBootstrapComplete returns true if bootstrap has completed.
154-
func IsBootstrapComplete(configmapLister corev1listers.ConfigMapLister, staticPodClient v1helpers.StaticPodOperatorClient, etcdClient etcdcli.AllMemberLister) (bool, error) {
162+
func IsBootstrapComplete(configmapLister corev1listers.ConfigMapLister, etcdClient etcdcli.AllMemberLister) (bool, error) {
155163
// do a cheap check to see if the installer has marked
156164
// bootstrapping as done by creating the configmap first.
157165
if isBootstrapComplete, err := bootstrap.IsBootstrapComplete(configmapLister); !isBootstrapComplete || err != nil {
158166
return isBootstrapComplete, err
159167
}
160168

161-
// now run check to stability of revisions
169+
// check if etcd-bootstrap member is still present within the etcd cluster membership
170+
membersList, err := etcdClient.MemberList(context.Background())
171+
if err != nil {
172+
return false, fmt.Errorf("IsBootstrapComplete couldn't list the etcd cluster members: %w", err)
173+
}
174+
for _, m := range membersList {
175+
if m.Name == "etcd-bootstrap" {
176+
klog.V(4).Infof("(etcd-bootstrap) member is still present in the etcd cluster membership")
177+
return false, nil
178+
}
179+
}
180+
181+
return true, nil
182+
}
183+
184+
// IsRevisionStable checks the stability of revisions and returns true if the revision is stable
185+
func IsRevisionStable(staticPodClient v1helpers.StaticPodOperatorClient) (bool, error) {
162186
_, status, _, err := staticPodClient.GetStaticPodOperatorState()
163187
if err != nil {
164188
return false, fmt.Errorf("failed to get static pod operator state: %w", err)
@@ -168,19 +192,7 @@ func IsBootstrapComplete(configmapLister corev1listers.ConfigMapLister, staticPo
168192
}
169193
for _, curr := range status.NodeStatuses {
170194
if curr.CurrentRevision != status.LatestAvailableRevision {
171-
klog.V(4).Infof("bootstrap considered incomplete because revision %d is still in progress", status.LatestAvailableRevision)
172-
return false, nil
173-
}
174-
}
175-
176-
// check if etcd-bootstrap member is still present within the etcd cluster membership
177-
membersList, err := etcdClient.MemberList(context.Background())
178-
if err != nil {
179-
return false, fmt.Errorf("IsBootstrapComplete couldn't list the etcd cluster members: %w", err)
180-
}
181-
for _, m := range membersList {
182-
if m.Name == "etcd-bootstrap" {
183-
klog.V(4).Infof("(etcd-bootstrap) member is still present in the etcd cluster membership")
195+
klog.V(4).Infof("revision stability check failed because revision %d is still in progress", status.LatestAvailableRevision)
184196
return false, nil
185197
}
186198
}

pkg/operator/ceohelpers/bootstrap_test.go

Lines changed: 42 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -164,56 +164,36 @@ func Test_GetBootstrapScalingStrategy(t *testing.T) {
164164
func Test_IsBootstrapComplete(t *testing.T) {
165165
tests := map[string]struct {
166166
bootstrapConfigMap *corev1.ConfigMap
167-
nodes []operatorv1.NodeStatus
168167
etcdMembers []*etcdserverpb.Member
169168
expectComplete bool
170169
expectError error
171170
}{
172-
"bootstrap complete, nodes up to date": {
171+
"bootstrap complete, configmap status is complete": {
173172
bootstrapConfigMap: bootstrapComplete,
174-
nodes: twoNodesAtCurrentRevision,
175173
etcdMembers: u.DefaultEtcdMembers(),
176174
expectComplete: true,
177175
expectError: nil,
178176
},
179-
"bootstrap progressing, nodes up to date": {
177+
"bootstrap progressing, configmap status is progressing": {
180178
bootstrapConfigMap: bootstrapProgressing,
181-
nodes: twoNodesAtCurrentRevision,
182179
etcdMembers: u.DefaultEtcdMembers(),
183180
expectComplete: false,
184181
expectError: nil,
185182
},
186183
"bootstrap configmap missing": {
187184
bootstrapConfigMap: nil,
188-
nodes: twoNodesAtCurrentRevision,
189-
etcdMembers: u.DefaultEtcdMembers(),
190-
expectComplete: false,
191-
expectError: nil,
192-
},
193-
"bootstrap complete, no recorded revisions": {
194-
bootstrapConfigMap: bootstrapComplete,
195-
nodes: zeroNodesAtAnyRevision,
196-
etcdMembers: u.DefaultEtcdMembers(),
197-
expectComplete: true,
198-
expectError: nil,
199-
},
200-
"bootstrap complete, node progressing": {
201-
bootstrapConfigMap: bootstrapComplete,
202-
nodes: twoNodesProgressingTowardsCurrentRevision,
203185
etcdMembers: u.DefaultEtcdMembers(),
204186
expectComplete: false,
205187
expectError: nil,
206188
},
207189
"bootstrap complete, etcd-bootstrap removed": {
208190
bootstrapConfigMap: bootstrapComplete,
209-
nodes: twoNodesAtCurrentRevision,
210191
etcdMembers: u.DefaultEtcdMembers(),
211192
expectComplete: true,
212193
expectError: nil,
213194
},
214195
"bootstrap complete, etcd-bootstrap exists": {
215196
bootstrapConfigMap: bootstrapComplete,
216-
nodes: twoNodesAtCurrentRevision,
217197
etcdMembers: append(u.DefaultEtcdMembers(), u.FakeEtcdBootstrapMember(0)),
218198
expectComplete: false,
219199
expectError: nil,
@@ -230,25 +210,58 @@ func Test_IsBootstrapComplete(t *testing.T) {
230210
}
231211
fakeConfigMapLister := corev1listers.NewConfigMapLister(indexer)
232212

233-
operatorStatus := &operatorv1.StaticPodOperatorStatus{
234-
OperatorStatus: operatorv1.OperatorStatus{LatestAvailableRevision: 1},
235-
NodeStatuses: test.nodes,
236-
}
237-
fakeStaticPodClient := v1helpers.NewFakeStaticPodOperatorClient(nil, operatorStatus, nil, nil)
238-
239213
fakeEtcdClient, err := etcdcli.NewFakeEtcdClient(test.etcdMembers)
240214
if err != nil {
241215
t.Fatal(err)
242216
}
243217

244-
actualComplete, actualErr := IsBootstrapComplete(fakeConfigMapLister, fakeStaticPodClient, fakeEtcdClient)
218+
actualComplete, actualErr := IsBootstrapComplete(fakeConfigMapLister, fakeEtcdClient)
245219

246220
assert.Equal(t, test.expectComplete, actualComplete)
247221
assert.Equal(t, test.expectError, actualErr)
248222
})
249223
}
250224
}
251225

226+
func Test_IsRevisionStable(t *testing.T) {
227+
tests := map[string]struct {
228+
nodes []operatorv1.NodeStatus
229+
expectedStability bool
230+
expectedError error
231+
}{
232+
"is revision stable, node progressing": {
233+
nodes: twoNodesProgressingTowardsCurrentRevision,
234+
expectedStability: false,
235+
expectedError: nil,
236+
},
237+
"is revision stable, nodes up to date": {
238+
nodes: twoNodesAtCurrentRevision,
239+
expectedStability: true,
240+
expectedError: nil,
241+
},
242+
"bootstrap complete, no recorded revisions": {
243+
nodes: zeroNodesAtAnyRevision,
244+
expectedStability: true,
245+
expectedError: nil,
246+
},
247+
}
248+
for name, test := range tests {
249+
t.Run(name, func(t *testing.T) {
250+
operatorStatus := &operatorv1.StaticPodOperatorStatus{
251+
OperatorStatus: operatorv1.OperatorStatus{LatestAvailableRevision: 1},
252+
NodeStatuses: test.nodes,
253+
}
254+
fakeStaticPodClient := v1helpers.NewFakeStaticPodOperatorClient(nil, operatorStatus, nil, nil)
255+
256+
actualStability, actualErr := IsRevisionStable(fakeStaticPodClient)
257+
258+
assert.Equal(t, test.expectedStability, actualStability)
259+
assert.Equal(t, test.expectedError, actualErr)
260+
261+
})
262+
}
263+
}
264+
252265
func Test_CheckSafeToScaleCluster(t *testing.T) {
253266
tests := map[string]struct {
254267
namespace *corev1.Namespace

pkg/operator/clustermemberremovalcontroller/clustermemberremovalcontroller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ func NewClusterMemberRemovalController(
101101

102102
func (c *clusterMemberRemovalController) sync(ctx context.Context, syncCtx factory.SyncContext) error {
103103
// skip reconciling this controller, if bootstrapping is not completed
104-
bootstrapComplete, err := ceohelpers.IsBootstrapComplete(c.configMapLister, c.operatorClient, c.etcdClient)
104+
bootstrapComplete, err := ceohelpers.IsBootstrapComplete(c.configMapLister, c.etcdClient)
105105
if err != nil {
106106
return fmt.Errorf("IsBootstrapComplete failed to determine bootstrap status: %w", err)
107107
}

pkg/operator/etcdendpointscontroller/etcdendpointscontroller.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,13 +92,18 @@ func (c *EtcdEndpointsController) syncConfigMap(ctx context.Context, recorder ev
9292
// forward or remove it if possible so clients can forget about it.
9393
if existing, err := c.configmapLister.ConfigMaps(operatorclient.TargetNamespace).Get("etcd-endpoints"); err == nil && existing != nil {
9494
if existingIP, hasExistingIP := existing.Annotations[etcdcli.BootstrapIPAnnotationKey]; hasExistingIP {
95-
bootstrapComplete, err := ceohelpers.IsBootstrapComplete(c.configmapLister, c.operatorClient, c.etcdClient)
95+
bootstrapComplete, err := ceohelpers.IsBootstrapComplete(c.configmapLister, c.etcdClient)
9696
if err != nil {
9797
return fmt.Errorf("couldn't determine bootstrap status: %w", err)
9898
}
9999

100-
if bootstrapComplete {
101-
// rename the annotation once we're done with bootstrapping
100+
revisionStable, err := ceohelpers.IsRevisionStable(c.operatorClient)
101+
if err != nil {
102+
return fmt.Errorf("couldn't determine stability of revisions: %w", err)
103+
}
104+
105+
if bootstrapComplete && revisionStable {
106+
// rename the annotation once we're done with bootstrapping and the revision is stable
102107
required.Annotations[etcdcli.BootstrapIPAnnotationKey+"-"] = existingIP
103108
} else {
104109
required.Annotations[etcdcli.BootstrapIPAnnotationKey] = existingIP

0 commit comments

Comments
 (0)