Skip to content

Commit 859b7fe

Browse files
Merge pull request #18322 from smarterclayton/bootstrap
Automatic merge from submit-queue. Write Kubelet flags as an option on openshift start node to support moving directly to kubelet Instead of having openshift start node bootstrap, prepare to move to directly invoking the kubelet by having a flag on `openshift start node` called `--write-flags` that generates the arguments to invoke the kubelet for a given `--config`. Instead of calling `openshift start node` to do bootstrapping, we'd instead invoke the --write-flags path and call the kubelet directly. The generated node-config on the system would have the correct bootstrap settings. Would require us to move to dynamic config in the kubelet or to add a secondary loop to pull down the latest kube-config. That's probably acceptable. Also contains a set of changes that allow certificate rotation to happen completely in the background, rather than blocking the kubelet startup. This allows us to keep bootstrapping the node from the master, but to still launch static pods in the bacgkround (right now we can't launch static pods while bootstrapping because bootstrapping is happening *before* the kubelet pod sync loop runs). In this model, master containers as static pods will not require any node changes to make work (so master nodes wouldn't be different from other nodes). I'm going to clean this up and propose upstream. Note that this path would *not* require --runonce mode, which is very good because it's effectively unsupported. @deads2k we're block on static pod for kubelet until we sort out the path forward. I don't want to have two separate types of node config, and I think this is probably the best position in the long run (all nodes bootstrap and have static pod config, nodes background loop waiting for bootstrapping and reject requests that require client/server connections until bootstrapping completes). Origin-commit: a5d2ee081b9430a35e1a083b9077a0639d2aa10e Kubernetes-commit: e6c9e9036872fde82dea33f209ff8a33b44ea8d9
2 parents f6608fb + f2eca55 commit 859b7fe

File tree

2 files changed

+40
-101
lines changed

2 files changed

+40
-101
lines changed

util/certificate/certificate_manager.go

Lines changed: 32 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,6 @@ type manager struct {
141141
certStore Store
142142
certAccessLock sync.RWMutex
143143
cert *tls.Certificate
144-
rotationDeadline time.Time
145144
forceRotation bool
146145
certificateExpiration Gauge
147146
serverHealth bool
@@ -208,36 +207,26 @@ func (m *manager) SetCertificateSigningRequestClient(certSigningRequestClient ce
208207
func (m *manager) Start() {
209208
// Certificate rotation depends on access to the API server certificate
210209
// signing API, so don't start the certificate manager if we don't have a
211-
// client. This will happen on the cluster master, where the kubelet is
212-
// responsible for bootstrapping the pods of the master components.
210+
// client.
213211
if m.certSigningRequestClient == nil {
214212
glog.V(2).Infof("Certificate rotation is not enabled, no connection to the apiserver.")
215213
return
216214
}
217215

218216
glog.V(2).Infof("Certificate rotation is enabled.")
219217

220-
m.setRotationDeadline()
221-
222-
// Synchronously request a certificate before entering the background
223-
// loop to allow bootstrap scenarios, where the certificate manager
224-
// doesn't have a certificate at all yet.
225-
if m.shouldRotate() {
226-
glog.V(1).Infof("shouldRotate() is true, forcing immediate rotation")
227-
if _, err := m.rotateCerts(); err != nil {
228-
utilruntime.HandleError(fmt.Errorf("Could not rotate certificates: %v", err))
229-
}
230-
}
231-
backoff := wait.Backoff{
232-
Duration: 2 * time.Second,
233-
Factor: 2,
234-
Jitter: 0.1,
235-
Steps: 5,
236-
}
237218
go wait.Forever(func() {
238-
sleepInterval := m.rotationDeadline.Sub(time.Now())
239-
glog.V(2).Infof("Waiting %v for next certificate rotation", sleepInterval)
240-
time.Sleep(sleepInterval)
219+
deadline := m.nextRotationDeadline()
220+
if sleepInterval := deadline.Sub(time.Now()); sleepInterval > 0 {
221+
glog.V(2).Infof("Waiting %v for next certificate rotation", sleepInterval)
222+
time.Sleep(sleepInterval)
223+
}
224+
backoff := wait.Backoff{
225+
Duration: 2 * time.Second,
226+
Factor: 2,
227+
Jitter: 0.1,
228+
Steps: 5,
229+
}
241230
if err := wait.ExponentialBackoff(backoff, m.rotateCerts); err != nil {
242231
utilruntime.HandleError(fmt.Errorf("Reached backoff limit, still unable to rotate certs: %v", err))
243232
wait.PollInfinite(32*time.Second, m.rotateCerts)
@@ -252,11 +241,14 @@ func getCurrentCertificateOrBootstrap(
252241

253242
currentCert, err := store.Current()
254243
if err == nil {
255-
return currentCert, false, nil
256-
}
257-
258-
if _, ok := err.(*NoCertKeyError); !ok {
259-
return nil, false, err
244+
// if the current cert is expired, fall back to the bootstrap cert
245+
if currentCert.Leaf != nil && time.Now().Before(currentCert.Leaf.NotAfter) {
246+
return currentCert, false, nil
247+
}
248+
} else {
249+
if _, ok := err.(*NoCertKeyError); !ok {
250+
return nil, false, err
251+
}
260252
}
261253

262254
if bootstrapCertificatePEM == nil || bootstrapKeyPEM == nil {
@@ -279,20 +271,6 @@ func getCurrentCertificateOrBootstrap(
279271
return &bootstrapCert, true, nil
280272
}
281273

282-
// shouldRotate looks at how close the current certificate is to expiring and
283-
// decides if it is time to rotate or not.
284-
func (m *manager) shouldRotate() bool {
285-
m.certAccessLock.RLock()
286-
defer m.certAccessLock.RUnlock()
287-
if m.cert == nil {
288-
return true
289-
}
290-
if m.forceRotation {
291-
return true
292-
}
293-
return time.Now().After(m.rotationDeadline)
294-
}
295-
296274
// rotateCerts attempts to request a client cert from the server, wait a reasonable
297275
// period of time for it to be signed, and then update the cert on disk. If it cannot
298276
// retrieve a cert, it will return false. It will only return error in exceptional cases.
@@ -349,30 +327,34 @@ func (m *manager) rotateCerts() (bool, error) {
349327
}
350328

351329
m.updateCached(cert)
352-
m.setRotationDeadline()
353-
m.forceRotation = false
354330
return true, nil
355331
}
356332

357-
// setRotationDeadline sets a cached value for the threshold at which the
333+
// nextRotationDeadline returns a value for the threshold at which the
358334
// current certificate should be rotated, 80%+/-10% of the expiration of the
359335
// certificate.
360-
func (m *manager) setRotationDeadline() {
336+
func (m *manager) nextRotationDeadline() time.Time {
337+
// forceRotation is not protected by locks
338+
if m.forceRotation {
339+
m.forceRotation = false
340+
return time.Now()
341+
}
342+
361343
m.certAccessLock.RLock()
362344
defer m.certAccessLock.RUnlock()
363345
if m.cert == nil {
364-
m.rotationDeadline = time.Now()
365-
return
346+
return time.Now()
366347
}
367348

368349
notAfter := m.cert.Leaf.NotAfter
369350
totalDuration := float64(notAfter.Sub(m.cert.Leaf.NotBefore))
351+
deadline := m.cert.Leaf.NotBefore.Add(jitteryDuration(totalDuration))
370352

371-
m.rotationDeadline = m.cert.Leaf.NotBefore.Add(jitteryDuration(totalDuration))
372-
glog.V(2).Infof("Certificate expiration is %v, rotation deadline is %v", notAfter, m.rotationDeadline)
353+
glog.V(2).Infof("Certificate expiration is %v, rotation deadline is %v", notAfter, deadline)
373354
if m.certificateExpiration != nil {
374355
m.certificateExpiration.Set(float64(notAfter.Unix()))
375356
}
357+
return deadline
376358
}
377359

378360
// jitteryDuration uses some jitter to set the rotation threshold so each node

util/certificate/certificate_manager_test.go

Lines changed: 8 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -146,46 +146,6 @@ func TestNewManagerNoRotation(t *testing.T) {
146146
}
147147
}
148148

149-
func TestShouldRotate(t *testing.T) {
150-
now := time.Now()
151-
tests := []struct {
152-
name string
153-
notBefore time.Time
154-
notAfter time.Time
155-
shouldRotate bool
156-
}{
157-
{"just issued, still good", now.Add(-1 * time.Hour), now.Add(99 * time.Hour), false},
158-
{"half way expired, still good", now.Add(-24 * time.Hour), now.Add(24 * time.Hour), false},
159-
{"mostly expired, still good", now.Add(-69 * time.Hour), now.Add(31 * time.Hour), false},
160-
{"just about expired, should rotate", now.Add(-91 * time.Hour), now.Add(9 * time.Hour), true},
161-
{"nearly expired, should rotate", now.Add(-99 * time.Hour), now.Add(1 * time.Hour), true},
162-
{"already expired, should rotate", now.Add(-10 * time.Hour), now.Add(-1 * time.Hour), true},
163-
}
164-
165-
for _, test := range tests {
166-
t.Run(test.name, func(t *testing.T) {
167-
m := manager{
168-
cert: &tls.Certificate{
169-
Leaf: &x509.Certificate{
170-
NotBefore: test.notBefore,
171-
NotAfter: test.notAfter,
172-
},
173-
},
174-
template: &x509.CertificateRequest{},
175-
usages: []certificates.KeyUsage{},
176-
}
177-
m.setRotationDeadline()
178-
if m.shouldRotate() != test.shouldRotate {
179-
t.Errorf("Time %v, a certificate issued for (%v, %v) should rotate should be %t.",
180-
now,
181-
m.cert.Leaf.NotBefore,
182-
m.cert.Leaf.NotAfter,
183-
test.shouldRotate)
184-
}
185-
})
186-
}
187-
}
188-
189149
type gaugeMock struct {
190150
calls int
191151
lastValue float64
@@ -233,13 +193,13 @@ func TestSetRotationDeadline(t *testing.T) {
233193
jitteryDuration = func(float64) time.Duration { return time.Duration(float64(tc.notAfter.Sub(tc.notBefore)) * 0.7) }
234194
lowerBound := tc.notBefore.Add(time.Duration(float64(tc.notAfter.Sub(tc.notBefore)) * 0.7))
235195

236-
m.setRotationDeadline()
196+
deadline := m.nextRotationDeadline()
237197

238-
if !m.rotationDeadline.Equal(lowerBound) {
198+
if !deadline.Equal(lowerBound) {
239199
t.Errorf("For notBefore %v, notAfter %v, the rotationDeadline %v should be %v.",
240200
tc.notBefore,
241201
tc.notAfter,
242-
m.rotationDeadline,
202+
deadline,
243203
lowerBound)
244204
}
245205
if g.calls != 1 {
@@ -321,7 +281,7 @@ func TestNewManagerBootstrap(t *testing.T) {
321281
}
322282
if m, ok := cm.(*manager); !ok {
323283
t.Errorf("Expected a '*manager' from 'NewManager'")
324-
} else if !m.shouldRotate() {
284+
} else if !m.forceRotation {
325285
t.Errorf("Expected rotation should happen during bootstrap, but it won't.")
326286
}
327287
}
@@ -360,9 +320,8 @@ func TestNewManagerNoBootstrap(t *testing.T) {
360320
if m, ok := cm.(*manager); !ok {
361321
t.Errorf("Expected a '*manager' from 'NewManager'")
362322
} else {
363-
m.setRotationDeadline()
364-
if m.shouldRotate() {
365-
t.Errorf("Expected rotation should happen during bootstrap, but it won't.")
323+
if m.forceRotation {
324+
t.Errorf("Expected rotation should not happen during bootstrap, but it won't.")
366325
}
367326
}
368327
}
@@ -515,8 +474,7 @@ func TestInitializeCertificateSigningRequestClient(t *testing.T) {
515474
if m, ok := certificateManager.(*manager); !ok {
516475
t.Errorf("Expected a '*manager' from 'NewManager'")
517476
} else {
518-
m.setRotationDeadline()
519-
if m.shouldRotate() {
477+
if m.forceRotation {
520478
if success, err := m.rotateCerts(); !success {
521479
t.Errorf("Got failure from 'rotateCerts', wanted success.")
522480
} else if err != nil {
@@ -614,8 +572,7 @@ func TestInitializeOtherRESTClients(t *testing.T) {
614572
if m, ok := certificateManager.(*manager); !ok {
615573
t.Errorf("Expected a '*manager' from 'NewManager'")
616574
} else {
617-
m.setRotationDeadline()
618-
if m.shouldRotate() {
575+
if m.forceRotation {
619576
success, err := certificateManager.(*manager).rotateCerts()
620577
if err != nil {
621578
t.Errorf("Got error %v, expected none.", err)

0 commit comments

Comments
 (0)