Skip to content

Commit 0931c72

Browse files
committed
Fix issues with oc adm migrate authorization
This change handles a number of bugs with the migrate authorization command: 1. Rework MigrateAuthorizationOptions.checkParity to be a MigrateActionFunc instead of a MigrateVisitFunc. This allows us to take advantage of migrate's default retry handling. 2. Add proper retry logic to checkParity, and have all check* funcs return the appropriate TemporaryError based on the situation. This coupled with migrate's existing retry logic makes the command resilient against common errors such as the deletion of resources. 3. Remove the binding of the standard migrate flags from migrate authorization. This command supports no parameters, and exposing the standard migrate parameters allows the user to accidentally break how the command runs. 4. Fix GroupVersion constants used for discovery based gating. They were incorrectly set to the internal version instead of v1. This would cause the policy based gating to always think that the server did not support policy objects. 5. Force RBAC client to use v1beta1 since that is the only version supported by a 3.6 server. This allows you to use a 3.9 client against a 3.6 server. 6. Remove rate limiting from the RBAC client to fix BZ 1513139. Only a cluster admin can interact with RBAC resources on a 3.6 server, so this will quickly error out if run by a non-privileged user. Bug 1513139 Signed-off-by: Monis Khan <[email protected]>
1 parent a331b46 commit 0931c72

File tree

4 files changed

+133
-72
lines changed

4 files changed

+133
-72
lines changed

pkg/cmd/admin/migrate/authorization/authorization.go

Lines changed: 117 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@ import (
55
"fmt"
66
"io"
77

8+
kerrs "k8s.io/apimachinery/pkg/api/errors"
89
"k8s.io/apimachinery/pkg/apis/meta/v1"
9-
"k8s.io/apimachinery/pkg/runtime"
10-
utilerrors "k8s.io/apimachinery/pkg/util/errors"
10+
"k8s.io/client-go/util/flowcontrol"
11+
"k8s.io/kubernetes/pkg/apis/rbac/v1beta1"
1112
rbacinternalversion "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/typed/rbac/internalversion"
1213
kcmdutil "k8s.io/kubernetes/pkg/kubectl/cmd/util"
1314
"k8s.io/kubernetes/pkg/kubectl/resource"
@@ -39,7 +40,8 @@ var (
3940
4041
No resources are mutated.`)
4142

42-
errOutOfSync = errors.New("is not in sync with RBAC")
43+
// errOutOfSync is retriable since it could be caused by the controller lagging behind
44+
errOutOfSync = migrate.ErrRetriable{errors.New("is not in sync with RBAC")}
4345
)
4446

4547
type MigrateAuthorizationOptions struct {
@@ -54,6 +56,7 @@ func NewCmdMigrateAuthorization(name, fullName string, f *clientcmd.Factory, in
5456
Out: out,
5557
ErrOut: errout,
5658
AllNamespaces: true,
59+
Confirm: true, // force our save function to always run (it is read only)
5760
Include: []string{
5861
"clusterroles.authorization.openshift.io",
5962
"roles.authorization.openshift.io",
@@ -80,20 +83,40 @@ func (o *MigrateAuthorizationOptions) Complete(name string, f *clientcmd.Factory
8083
return fmt.Errorf("%s takes no positional arguments", name)
8184
}
8285

86+
o.ResourceOptions.SaveFn = o.checkParity
8387
if err := o.ResourceOptions.Complete(f, c); err != nil {
8488
return err
8589
}
8690

87-
_, kclient, err := f.Clients()
91+
discovery, err := f.DiscoveryClient()
8892
if err != nil {
8993
return err
9094
}
9195

92-
if err := clientcmd.LegacyPolicyResourceGate(kclient.Discovery()); err != nil {
96+
if err := clientcmd.LegacyPolicyResourceGate(discovery); err != nil {
9397
return err
9498
}
9599

96-
o.rbac = kclient.Rbac()
100+
config, err := f.ClientConfig()
101+
if err != nil {
102+
return err
103+
}
104+
105+
// do not rate limit this client because it has to scan all RBAC data across the cluster
106+
// this is safe because only a cluster admin will have the ability to read these objects
107+
configShallowCopy := *config
108+
configShallowCopy.RateLimiter = flowcontrol.NewFakeAlwaysRateLimiter()
109+
110+
// This command is only compatible with a 3.6 server, which only supported RBAC v1beta1
111+
// Thus we must force that GV otherwise the client will default to v1
112+
gv := v1beta1.SchemeGroupVersion
113+
configShallowCopy.GroupVersion = &gv
114+
115+
rbac, err := rbacinternalversion.NewForConfig(&configShallowCopy)
116+
if err != nil {
117+
return err
118+
}
119+
o.rbac = rbac
97120

98121
return nil
99122
}
@@ -103,130 +126,159 @@ func (o MigrateAuthorizationOptions) Validate() error {
103126
}
104127

105128
func (o MigrateAuthorizationOptions) Run() error {
106-
return o.ResourceOptions.Visitor().Visit(func(info *resource.Info) (migrate.Reporter, error) {
107-
return o.checkParity(info.Object)
108-
})
129+
// we lie and say this object has changed so our save function will run
130+
return o.ResourceOptions.Visitor().Visit(migrate.AlwaysRequiresMigration)
109131
}
110132

111133
// checkParity confirms that Openshift authorization objects are in sync with Kubernetes RBAC
112134
// and returns an error if they are out of sync or if it encounters a conversion error
113-
func (o *MigrateAuthorizationOptions) checkParity(obj runtime.Object) (migrate.Reporter, error) {
114-
var errlist []error
115-
switch t := obj.(type) {
135+
func (o *MigrateAuthorizationOptions) checkParity(info *resource.Info, _ migrate.Reporter) error {
136+
var err migrate.TemporaryError
137+
138+
switch t := info.Object.(type) {
116139
case *authorizationapi.ClusterRole:
117-
errlist = append(errlist, o.checkClusterRole(t)...)
140+
err = o.checkClusterRole(t)
118141
case *authorizationapi.Role:
119-
errlist = append(errlist, o.checkRole(t)...)
142+
err = o.checkRole(t)
120143
case *authorizationapi.ClusterRoleBinding:
121-
errlist = append(errlist, o.checkClusterRoleBinding(t)...)
144+
err = o.checkClusterRoleBinding(t)
122145
case *authorizationapi.RoleBinding:
123-
errlist = append(errlist, o.checkRoleBinding(t)...)
146+
err = o.checkRoleBinding(t)
124147
default:
125-
return nil, nil // indicate that we ignored the object
148+
// this should never happen unless o.Include or the server is broken
149+
return fmt.Errorf("impossible type %T for checkParity info=%#v object=%#v", t, info, t)
150+
}
151+
152+
// We encountered no error, so this object is in sync.
153+
if err == nil {
154+
// we only perform read operations so we return this error to signal that we did not change anything
155+
return migrate.ErrUnchanged
156+
}
157+
158+
// At this point we know that we have some non-nil TemporaryError.
159+
// If it has the possibility of being transient, we need to sync ourselves with the current state of the object.
160+
if err.Temporary() {
161+
// The most likely cause is that an authorization object was deleted after we initially fetched it,
162+
// and the controller deleted the associated RBAC object, which caused our RBAC GET to fail.
163+
// We can confirm this by refetching the authorization object.
164+
refreshErr := info.Get()
165+
if refreshErr != nil {
166+
// Our refresh GET for this authorization object failed.
167+
// The default logic for migration errors is appropriate in this case (refreshErr is most likely a NotFound).
168+
return migrate.DefaultRetriable(info, refreshErr)
169+
}
170+
// We had no refreshErr, but encountered some other possibly transient error.
171+
// No special handling is required in this case, we just pass it through below.
126172
}
127-
return migrate.NotChanged, utilerrors.NewAggregate(errlist) // we only perform read operations
173+
174+
// All of the check* funcs return an appropriate TemporaryError based on the failure,
175+
// so we can pass that through to the default migration logic which will retry as needed.
176+
return err
128177
}
129178

130-
func (o *MigrateAuthorizationOptions) checkClusterRole(originClusterRole *authorizationapi.ClusterRole) []error {
131-
var errlist []error
179+
// handleRBACGetError signals for a retry on NotFound (handles deletion and sync lag)
180+
// and ServerTimeout (handles heavy load against the server).
181+
func handleRBACGetError(err error) migrate.TemporaryError {
182+
switch {
183+
case kerrs.IsNotFound(err), kerrs.IsServerTimeout(err):
184+
return migrate.ErrRetriable{err}
185+
default:
186+
return migrate.ErrNotRetriable{err}
187+
}
188+
}
132189

190+
func (o *MigrateAuthorizationOptions) checkClusterRole(originClusterRole *authorizationapi.ClusterRole) migrate.TemporaryError {
133191
// convert the origin role to a rbac role
134192
convertedClusterRole, err := authorizationsync.ConvertToRBACClusterRole(originClusterRole)
135193
if err != nil {
136-
errlist = append(errlist, err)
194+
// conversion errors should basically never happen, so we do not attempt to retry on those
195+
return migrate.ErrNotRetriable{err}
137196
}
138197

139198
// try to get the equivalent rbac role from the api
140199
rbacClusterRole, err := o.rbac.ClusterRoles().Get(originClusterRole.Name, v1.GetOptions{})
141200
if err != nil {
142-
errlist = append(errlist, err)
201+
// it is possible that the controller has not synced this yet
202+
return handleRBACGetError(err)
143203
}
144204

145-
// compare the results if there have been no errors so far
146-
if len(errlist) == 0 {
147-
// if they are not equal, something has gone wrong and the two objects are not in sync
148-
if authorizationsync.PrepareForUpdateClusterRole(convertedClusterRole, rbacClusterRole) {
149-
errlist = append(errlist, errOutOfSync)
150-
}
205+
// if they are not equal, something has gone wrong and the two objects are not in sync
206+
if authorizationsync.PrepareForUpdateClusterRole(convertedClusterRole, rbacClusterRole) {
207+
// we retry on this since it could be caused by the controller lagging behind
208+
return errOutOfSync
151209
}
152210

153-
return errlist
211+
return nil
154212
}
155213

156-
func (o *MigrateAuthorizationOptions) checkRole(originRole *authorizationapi.Role) []error {
157-
var errlist []error
158-
214+
func (o *MigrateAuthorizationOptions) checkRole(originRole *authorizationapi.Role) migrate.TemporaryError {
159215
// convert the origin role to a rbac role
160216
convertedRole, err := authorizationsync.ConvertToRBACRole(originRole)
161217
if err != nil {
162-
errlist = append(errlist, err)
218+
// conversion errors should basically never happen, so we do not attempt to retry on those
219+
return migrate.ErrNotRetriable{err}
163220
}
164221

165222
// try to get the equivalent rbac role from the api
166223
rbacRole, err := o.rbac.Roles(originRole.Namespace).Get(originRole.Name, v1.GetOptions{})
167224
if err != nil {
168-
errlist = append(errlist, err)
225+
// it is possible that the controller has not synced this yet
226+
return handleRBACGetError(err)
169227
}
170228

171-
// compare the results if there have been no errors so far
172-
if len(errlist) == 0 {
173-
// if they are not equal, something has gone wrong and the two objects are not in sync
174-
if authorizationsync.PrepareForUpdateRole(convertedRole, rbacRole) {
175-
errlist = append(errlist, errOutOfSync)
176-
}
229+
// if they are not equal, something has gone wrong and the two objects are not in sync
230+
if authorizationsync.PrepareForUpdateRole(convertedRole, rbacRole) {
231+
// we retry on this since it could be caused by the controller lagging behind
232+
return errOutOfSync
177233
}
178234

179-
return errlist
235+
return nil
180236
}
181237

182-
func (o *MigrateAuthorizationOptions) checkClusterRoleBinding(originRoleBinding *authorizationapi.ClusterRoleBinding) []error {
183-
var errlist []error
184-
238+
func (o *MigrateAuthorizationOptions) checkClusterRoleBinding(originRoleBinding *authorizationapi.ClusterRoleBinding) migrate.TemporaryError {
185239
// convert the origin role binding to a rbac role binding
186240
convertedRoleBinding, err := authorizationsync.ConvertToRBACClusterRoleBinding(originRoleBinding)
187241
if err != nil {
188-
errlist = append(errlist, err)
242+
// conversion errors should basically never happen, so we do not attempt to retry on those
243+
return migrate.ErrNotRetriable{err}
189244
}
190245

191246
// try to get the equivalent rbac role binding from the api
192247
rbacRoleBinding, err := o.rbac.ClusterRoleBindings().Get(originRoleBinding.Name, v1.GetOptions{})
193248
if err != nil {
194-
errlist = append(errlist, err)
249+
// it is possible that the controller has not synced this yet
250+
return handleRBACGetError(err)
195251
}
196252

197-
// compare the results if there have been no errors so far
198-
if len(errlist) == 0 {
199-
// if they are not equal, something has gone wrong and the two objects are not in sync
200-
if authorizationsync.PrepareForUpdateClusterRoleBinding(convertedRoleBinding, rbacRoleBinding) {
201-
errlist = append(errlist, errOutOfSync)
202-
}
253+
// if they are not equal, something has gone wrong and the two objects are not in sync
254+
if authorizationsync.PrepareForUpdateClusterRoleBinding(convertedRoleBinding, rbacRoleBinding) {
255+
// we retry on this since it could be caused by the controller lagging behind
256+
return errOutOfSync
203257
}
204258

205-
return errlist
259+
return nil
206260
}
207261

208-
func (o *MigrateAuthorizationOptions) checkRoleBinding(originRoleBinding *authorizationapi.RoleBinding) []error {
209-
var errlist []error
210-
262+
func (o *MigrateAuthorizationOptions) checkRoleBinding(originRoleBinding *authorizationapi.RoleBinding) migrate.TemporaryError {
211263
// convert the origin role binding to a rbac role binding
212264
convertedRoleBinding, err := authorizationsync.ConvertToRBACRoleBinding(originRoleBinding)
213265
if err != nil {
214-
errlist = append(errlist, err)
266+
// conversion errors should basically never happen, so we do not attempt to retry on those
267+
return migrate.ErrNotRetriable{err}
215268
}
216269

217270
// try to get the equivalent rbac role binding from the api
218271
rbacRoleBinding, err := o.rbac.RoleBindings(originRoleBinding.Namespace).Get(originRoleBinding.Name, v1.GetOptions{})
219272
if err != nil {
220-
errlist = append(errlist, err)
273+
// it is possible that the controller has not synced this yet
274+
return handleRBACGetError(err)
221275
}
222276

223-
// compare the results if there have been no errors so far
224-
if len(errlist) == 0 {
225-
// if they are not equal, something has gone wrong and the two objects are not in sync
226-
if authorizationsync.PrepareForUpdateRoleBinding(convertedRoleBinding, rbacRoleBinding) {
227-
errlist = append(errlist, errOutOfSync)
228-
}
277+
// if they are not equal, something has gone wrong and the two objects are not in sync
278+
if authorizationsync.PrepareForUpdateRoleBinding(convertedRoleBinding, rbacRoleBinding) {
279+
// we retry on this since it could be caused by the controller lagging behind
280+
return errOutOfSync
229281
}
230282

231-
return errlist
283+
return nil
232284
}

pkg/cmd/admin/migrate/migrator.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -350,23 +350,28 @@ var ErrUnchanged = fmt.Errorf("migration was not necessary")
350350
// both status and spec must be changed).
351351
var ErrRecalculate = fmt.Errorf("recalculate migration")
352352

353+
// MigrateError is an exported alias to error to allow external packages to use ErrRetriable and ErrNotRetriable
354+
type MigrateError error
355+
353356
// ErrRetriable is a wrapper for an error that a migrator may use to indicate the
354357
// specific error can be retried.
355358
type ErrRetriable struct {
356-
error
359+
MigrateError
357360
}
358361

359362
func (ErrRetriable) Temporary() bool { return true }
360363

361364
// ErrNotRetriable is a wrapper for an error that a migrator may use to indicate the
362365
// specific error cannot be retried.
363366
type ErrNotRetriable struct {
364-
error
367+
MigrateError
365368
}
366369

367370
func (ErrNotRetriable) Temporary() bool { return false }
368371

369-
type temporary interface {
372+
// TemporaryError is a wrapper interface that is used to determine if an error can be retried.
373+
type TemporaryError interface {
374+
error
370375
// Temporary should return true if this is a temporary error
371376
Temporary() bool
372377
}
@@ -478,7 +483,7 @@ func (t *migrateTracker) try(info *resource.Info, retries int) (attemptResult, e
478483

479484
// canRetry returns true if the provided error indicates a retry is possible.
480485
func canRetry(err error) bool {
481-
if temp, ok := err.(temporary); ok && temp.Temporary() {
486+
if temp, ok := err.(TemporaryError); ok && temp.Temporary() {
482487
return true
483488
}
484489
return err == ErrRecalculate

pkg/cmd/cli/cmd/create/policy_binding.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,16 @@ func (o *CreatePolicyBindingOptions) Complete(cmd *cobra.Command, f *clientcmd.F
7171
}
7272
o.BindingNamespace = namespace
7373

74-
client, kclient, err := f.Clients()
74+
discovery, err := f.DiscoveryClient()
7575
if err != nil {
7676
return err
7777
}
7878

79-
if err := clientcmd.LegacyPolicyResourceGate(kclient.Discovery()); err != nil {
79+
if err := clientcmd.LegacyPolicyResourceGate(discovery); err != nil {
80+
return err
81+
}
82+
client, _, err := f.Clients()
83+
if err != nil {
8084
return err
8185
}
8286

pkg/cmd/util/clientcmd/gating.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import (
88
"k8s.io/apimachinery/pkg/runtime/schema"
99
"k8s.io/client-go/discovery"
1010

11-
"github.com/openshift/origin/pkg/authorization/apis/authorization"
11+
authorization "github.com/openshift/origin/pkg/authorization/apis/authorization/v1"
1212
)
1313

1414
// LegacyPolicyResourceGate returns err if the server does not support the set of legacy policy objects (< 3.7)

0 commit comments

Comments
 (0)