@@ -26,9 +26,20 @@ import (
26
26
// exception string if does not think the condition should be fatal.
27
27
type exceptionCallback func (operator string , condition * configv1.ClusterOperatorStatusCondition , eventInterval monitorapi.Interval , clientConfig * rest.Config ) (string , error )
28
28
29
+ type upgradeWindowHolder struct {
30
+ startInterval * monitorapi.Interval
31
+ endInterval * monitorapi.Interval
32
+ }
33
+
29
34
func checkAuthenticationExceptions (condition * configv1.ClusterOperatorStatusCondition ) bool {
30
- if condition .Type == configv1 .OperatorAvailable && condition .Status == configv1 .ConditionFalse && (condition .Reason == "APIServices_Error" || condition .Reason == "APIServerDeployment_NoDeployment" || condition .Reason == "APIServerDeployment_NoPod" || condition .Reason == "APIServerDeployment_PreconditionNotFulfilled" || condition .Reason == "APIServices_PreconditionNotReady" || condition .Reason == "OAuthServerDeployment_NoDeployment" || condition .Reason == "OAuthServerRouteEndpointAccessibleController_EndpointUnavailable" || condition .Reason == "OAuthServerServiceEndpointAccessibleController_EndpointUnavailable" || condition .Reason == "WellKnown_NotReady" ) {
31
- return true
35
+ if condition .Type == configv1 .OperatorAvailable && condition .Status == configv1 .ConditionFalse {
36
+ switch condition .Reason {
37
+ case "APIServices_Error" , "APIServerDeployment_NoDeployment" , "APIServerDeployment_NoPod" ,
38
+ "APIServerDeployment_PreconditionNotFulfilled" , "APIServices_PreconditionNotReady" ,
39
+ "OAuthServerDeployment_NoDeployment" , "OAuthServerRouteEndpointAccessibleController_EndpointUnavailable" ,
40
+ "OAuthServerServiceEndpointAccessibleController_EndpointUnavailable" , "WellKnown_NotReady" :
41
+ return true
42
+ }
32
43
}
33
44
return false
34
45
}
@@ -45,17 +56,8 @@ func testStableSystemOperatorStateTransitions(events monitorapi.Intervals, clien
45
56
}
46
57
}
47
58
48
- isSingleNode , err := isSingleNodeCheck (clientConfig )
49
- if err != nil {
50
- logrus .Warnf ("Error checking for Single Node configuration on stable system (unable to make exception): %v" , err )
51
- isSingleNode = false
52
- }
53
-
54
59
// For the non-upgrade case, if any operator has Available=False, fail the test.
55
60
if condition .Type == configv1 .OperatorAvailable && condition .Status == configv1 .ConditionFalse {
56
- if isSingleNode {
57
- return "Operators are allowed to go degraded on single-node for now" , nil
58
- }
59
61
if operator == "authentication" {
60
62
if checkAuthenticationExceptions (condition ) {
61
63
return "https://issues.redhat.com/browse/OCPBUGS-20056" , nil
@@ -86,11 +88,7 @@ func isSingleNodeCheck(clientConfig *rest.Config) (bool, error) {
86
88
// UpgradeComplete and UpgradeFailed events end upgrade windows; if there was not an already started upgrade window,
87
89
// we ignore the event.
88
90
// If we don't find any upgrade ending point, we assume the ending point is at the end of the test.
89
- func isInUpgradeWindow (eventList monitorapi.Intervals , eventInterval monitorapi.Interval ) bool {
90
- type upgradeWindowHolder struct {
91
- startInterval * monitorapi.Interval
92
- endInterval * monitorapi.Interval
93
- }
91
+ func getUpgradeWindows (eventList monitorapi.Intervals ) []* upgradeWindowHolder {
94
92
95
93
var upgradeWindows []* upgradeWindowHolder
96
94
var currentWindow * upgradeWindowHolder
@@ -151,6 +149,10 @@ func isInUpgradeWindow(eventList monitorapi.Intervals, eventInterval monitorapi.
151
149
}
152
150
}
153
151
152
+ return upgradeWindows
153
+ }
154
+
155
+ func isInUpgradeWindow (upgradeWindows []* upgradeWindowHolder , eventInterval monitorapi.Interval ) bool {
154
156
for _ , upgradeWindow := range upgradeWindows {
155
157
if eventInterval .From .After (upgradeWindow .startInterval .From ) {
156
158
if upgradeWindow .endInterval == nil || eventInterval .To .Before (upgradeWindow .endInterval .To ) {
@@ -163,6 +165,13 @@ func isInUpgradeWindow(eventList monitorapi.Intervals, eventInterval monitorapi.
163
165
}
164
166
165
167
func testUpgradeOperatorStateTransitions (events monitorapi.Intervals , clientConfig * rest.Config ) []* junitapi.JUnitTestCase {
168
+ upgradeWindows := getUpgradeWindows (events )
169
+ isSingleNode , err := isSingleNodeCheck (clientConfig )
170
+ if err != nil {
171
+ logrus .Warnf ("Error checking for Single Node configuration on upgrade (unable to make exception): %v" , err )
172
+ isSingleNode = false
173
+ }
174
+
166
175
except := func (operator string , condition * configv1.ClusterOperatorStatusCondition , eventInterval monitorapi.Interval , clientConfig * rest.Config ) (string , error ) {
167
176
if condition .Status == configv1 .ConditionTrue {
168
177
if condition .Type == configv1 .OperatorAvailable {
@@ -178,35 +187,21 @@ func testUpgradeOperatorStateTransitions(events monitorapi.Intervals, clientConf
178
187
return "We are not worried about Degraded=True blips for update tests yet." , nil
179
188
}
180
189
181
- var availableEqualsFalseAllowed bool
182
- if condition .Type == configv1 .OperatorAvailable && condition .Status == configv1 .ConditionFalse {
183
- availableEqualsFalseAllowed = isInUpgradeWindow (events , eventInterval ) && eventInterval .To .Sub (eventInterval .From ) < 10 * time .Minute
184
- }
185
-
186
- isSingleNode , err := isSingleNodeCheck (clientConfig )
187
- if err != nil {
188
- logrus .Warnf ("Error checking for Single Node configuration on upgrade (unable to make exception): %v" , err )
189
- isSingleNode = false
190
- }
191
-
192
- // We'll add an exception for single node for now.
193
- if ! availableEqualsFalseAllowed {
194
-
195
- if isSingleNode {
196
- // We'll honor exceptions for single node configuration.
197
- logrus .Infof ("Operator %s is in Available=False state, but we give single node clusters an exception" , operator )
198
-
199
- } else if operator == "authentication" {
200
- // We'll honor exceptions for authentication operator because it is affected by etcd performance issues.
201
- logrus .Info ("Operator authentication is in Available=False state, but we give an exception" )
202
-
203
- } else if operator == "image-registry" {
204
- // For now, we'll honor exceptions for image-registry operator as it's affected by tests that
205
- // cause replicas to go down (e.g., tests that taint two nodes).
206
- logrus .Info ("Operator image-registry is in Available=False state, but we give an exception" )
207
- } else {
190
+ // we know the Status is not true and the Type is not degraded at this point indicating we are available=false
191
+ withinUpgradeWindowBuffer := isInUpgradeWindow (upgradeWindows , eventInterval ) && eventInterval .To .Sub (eventInterval .From ) < 10 * time .Minute
192
+ if ! withinUpgradeWindowBuffer {
193
+ switch operator {
194
+ // there are some known cases for authentication and image-registry that occur outside of upgrade window, so we will pass through and check for exceptions
195
+ case "authentication" , "image-registry" :
196
+ logrus .Infof ("Operator %s is in Available=False state outside of upgrade window, but we will check for exceptions" , operator )
197
+ default :
208
198
return "" , nil
209
199
}
200
+ } else {
201
+ // SingleNode is expected to go Available=False for most / all operators during upgrade
202
+ if isSingleNode {
203
+ return fmt .Sprintf ("Operator %s is in Available=False state running in single replica control plane, expected availability transition during upgrade" , operator ), nil
204
+ }
210
205
}
211
206
212
207
switch operator {
@@ -250,6 +245,8 @@ func testUpgradeOperatorStateTransitions(events monitorapi.Intervals, clientConf
250
245
return "https://issues.redhat.com/browse/OCPBUGS-23744" , nil
251
246
}
252
247
case "image-registry" :
248
+ // this won't handle the replicaCount==2 serial test where both pods are on nodes that get tainted.
249
+ // need to consider how we detect that or modify the job to set replicaCount==3
253
250
if replicaCount , _ := checkReplicas ("openshift-image-registry" , operator , clientConfig ); replicaCount == 1 {
254
251
return "https://issues.redhat.com/browse/OCPBUGS-22382" , nil
255
252
}
0 commit comments