Skip to content

Commit 0e4c94a

Browse files
Merge pull request #18587 from ramr/fix-reload-cert-deleted
Automatic merge from submit-queue (batch tested with PRs 18587, 18296, 18667, 18665, 18532). Fix router reload errors for deleted certificates When a router is reloaded after a batch of route/ingress changes are committed, haproxy sometimes fail to reload. This can happen if a new request to delete a route (and so delete the associated certificates) is processed when the haproxy router is reloading. The router does recover on subsequent reloads when the config changes are actually processed. See associated error log: https://gist.github.com/ramr/122d70591d1fb8f97820869f7ca5550f The change here defers the deletes till commit time and ensures we only delete the certificates for the routes that are being processed as part of the current batchset. To recreate: Just create and delete a number of routes in a loop ala: ``` function create_routes() { local n=${1:-$NROUTES} for i in `seq $n`; do echo " - Creating route header-test-route-$i ... " sed "s/%1/$i/g" route.json.template | oc create -f - done } function delete_routes() { local n=${1:-$NROUTES} for i in `seq $n`; do echo " - Deleting route header-test-route-$i ... " oc delete route header-test-route-$i # [ $((i%50)) == 0 ] && sleep 2 done } ``` where `route.json.template` is just a route with the `metadata.name`, `id` and `spec.host` fields containing some text (`%1`) that gets substituted. @knobunc @rajatchopra PTAL Thx
2 parents f1c64a4 + 456bb4b commit 0e4c94a

File tree

4 files changed

+76
-10
lines changed

4 files changed

+76
-10
lines changed

pkg/router/template/certmanager.go

Lines changed: 48 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,24 @@ import (
1212
routeapi "github.com/openshift/origin/pkg/route/apis/route"
1313
)
1414

15+
// certificateFile represents a certificate file.
16+
type certificateFile struct {
17+
certDir string
18+
id string
19+
}
20+
21+
// Tag generates a certificate file tag/name. This is used to index into the
22+
// the map of deleted certificates.
23+
func (cf certificateFile) Tag() string {
24+
return filepath.Join(cf.certDir, cf.id+".pem")
25+
}
26+
1527
// simpleCertificateManager is the default implementation of a certificateManager
1628
type simpleCertificateManager struct {
1729
cfg *certificateManagerConfig
1830
w certificateWriter
31+
32+
deletedCertificates map[string]certificateFile
1933
}
2034

2135
// newSimpleCertificateManager should be used to create a new cert manager. It will return an error
@@ -27,7 +41,7 @@ func newSimpleCertificateManager(cfg *certificateManagerConfig, w certificateWri
2741
if w == nil {
2842
return nil, fmt.Errorf("certificate manager requires a certificate writer")
2943
}
30-
return &simpleCertificateManager{cfg, w}, nil
44+
return &simpleCertificateManager{cfg, w, make(map[string]certificateFile, 0)}, nil
3145
}
3246

3347
// validateCertManagerConfig ensures that the key functions and directories are set as well as
@@ -81,6 +95,8 @@ func (cm *simpleCertificateManager) WriteCertificatesForConfig(config *ServiceAl
8195
buffer.Write([]byte(caCertObj.Contents))
8296
}
8397

98+
certFile := certificateFile{certDir: cm.cfg.certDir, id: certObj.ID}
99+
delete(cm.deletedCertificates, certFile.Tag())
84100
if err := cm.w.WriteCertificate(cm.cfg.certDir, certObj.ID, buffer.Bytes()); err != nil {
85101
return err
86102
}
@@ -92,6 +108,8 @@ func (cm *simpleCertificateManager) WriteCertificatesForConfig(config *ServiceAl
92108
destCert, ok := config.Certificates[destCertKey]
93109

94110
if ok {
111+
destCertFile := certificateFile{certDir: cm.cfg.caCertDir, id: destCert.ID}
112+
delete(cm.deletedCertificates, destCertFile.Tag())
95113
if err := cm.w.WriteCertificate(cm.cfg.caCertDir, destCert.ID, []byte(destCert.Contents)); err != nil {
96114
return err
97115
}
@@ -101,7 +119,7 @@ func (cm *simpleCertificateManager) WriteCertificatesForConfig(config *ServiceAl
101119
return nil
102120
}
103121

104-
// deleteCertificatesForConfig will delete all certificates for the ServiceAliasConfig
122+
// DeleteCertificatesForConfig will delete all certificates for the ServiceAliasConfig
105123
func (cm *simpleCertificateManager) DeleteCertificatesForConfig(config *ServiceAliasConfig) error {
106124
if config == nil {
107125
return nil
@@ -112,10 +130,8 @@ func (cm *simpleCertificateManager) DeleteCertificatesForConfig(config *ServiceA
112130
certObj, ok := config.Certificates[certKey]
113131

114132
if ok {
115-
err := cm.w.DeleteCertificate(cm.cfg.certDir, certObj.ID)
116-
if err != nil {
117-
return err
118-
}
133+
certFile := certificateFile{certDir: cm.cfg.certDir, id: certObj.ID}
134+
cm.deletedCertificates[certFile.Tag()] = certFile
119135
}
120136
}
121137

@@ -124,16 +140,38 @@ func (cm *simpleCertificateManager) DeleteCertificatesForConfig(config *ServiceA
124140
destCert, ok := config.Certificates[destCertKey]
125141

126142
if ok {
127-
err := cm.w.DeleteCertificate(cm.cfg.caCertDir, destCert.ID)
128-
if err != nil {
129-
return err
130-
}
143+
destCertFile := certificateFile{certDir: cm.cfg.caCertDir, id: destCert.ID}
144+
cm.deletedCertificates[destCertFile.Tag()] = destCertFile
131145
}
132146
}
133147
}
134148
return nil
135149
}
136150

151+
// Commit applies any pending changes made to the certificateManager.
152+
func (cm *simpleCertificateManager) Commit() error {
153+
// Deletion of certificates that are being referenced in backends or
154+
// config is problematic in that the template router will not
155+
// reload because the config is invalid, so we _do_ need to "stage"
156+
// or commit the removals. Remove all the deleted certificates.
157+
for _, certFile := range cm.deletedCertificates {
158+
err := cm.w.DeleteCertificate(certFile.certDir, certFile.id)
159+
if err != nil {
160+
// Log a warning if the delete fails but proceed on.
161+
glog.Warningf("Ignoring error deleting certificate file %v: %v", certFile.Tag(), err)
162+
}
163+
}
164+
165+
cm.deletedCertificates = make(map[string]certificateFile, 0)
166+
167+
// If we decide to stage the certificate writes, we can flush the
168+
// write to the disk here. Today, the certificate writes are done
169+
// just before this function is called. The tradeoff is storing a
170+
// copy in memory until we commit.
171+
172+
return nil
173+
}
174+
137175
// simpleCertificateWriter is the default implementation of a certificateWriter
138176
type simpleCertificateWriter struct{}
139177

pkg/router/template/certmanager_test.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package templaterouter
22

33
import (
44
"reflect"
5+
"sort"
56
"testing"
67

78
routeapi "github.com/openshift/origin/pkg/route/apis/route"
@@ -119,6 +120,7 @@ func TestCertManager(t *testing.T) {
119120
}
120121

121122
for k, tc := range testCases {
123+
certManager.Commit()
122124
fakeCertWriter.clear()
123125
err := certManager.WriteCertificatesForConfig(tc.cfg)
124126
if err != nil {
@@ -133,14 +135,31 @@ func TestCertManager(t *testing.T) {
133135
t.Errorf("Unexpected number of adds for %s occurred. Expected: %d Got: %d", k, len(tc.expectedAdds), len(fakeCertWriter.addedCerts))
134136
}
135137

138+
if 0 != len(fakeCertWriter.deletedCerts) {
139+
t.Errorf("Unexpected number of deletes prior to certManager.Commit() for %s occurred. Expected: 0 Got: %d", k, len(fakeCertWriter.deletedCerts))
140+
}
141+
142+
err = certManager.Commit()
143+
if err != nil {
144+
t.Fatalf("Unexpected error committing certs for service alias config for %s. Config: %v, err: %v", k, tc.cfg, err)
145+
}
146+
147+
if len(tc.expectedAdds) != len(fakeCertWriter.addedCerts) {
148+
t.Errorf("Unexpected number of adds for %s occurred. Expected: %d Got: %d", k, len(tc.expectedAdds), len(fakeCertWriter.addedCerts))
149+
}
150+
136151
if len(tc.expectedDeletes) != len(fakeCertWriter.deletedCerts) {
137152
t.Errorf("Unexpected number of deletes for %s occurred. Expected: %d Got: %d", k, len(tc.expectedDeletes), len(fakeCertWriter.deletedCerts))
138153
}
139154

155+
sort.Strings(tc.expectedAdds)
156+
sort.Strings(fakeCertWriter.addedCerts)
140157
if !reflect.DeepEqual(tc.expectedAdds, fakeCertWriter.addedCerts) {
141158
t.Errorf("Unexpected adds for %s, wanted: %v, got %v", k, tc.expectedAdds, fakeCertWriter.addedCerts)
142159
}
143160

161+
sort.Strings(tc.expectedDeletes)
162+
sort.Strings(fakeCertWriter.deletedCerts)
144163
if !reflect.DeepEqual(tc.expectedDeletes, fakeCertWriter.deletedCerts) {
145164
t.Errorf("Unexpected deletes for %s, wanted: %v, got %v", k, tc.expectedDeletes, fakeCertWriter.deletedCerts)
146165
}

pkg/router/template/router.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,13 @@ func (r *templateRouter) writeConfig() error {
394394
r.state[k] = cfg
395395
}
396396

397+
glog.V(4).Infof("Committing router certificate manager changes...")
398+
if err := r.certManager.Commit(); err != nil {
399+
return fmt.Errorf("error committing certificate changes: %v", err)
400+
}
401+
402+
glog.V(4).Infof("Router certificate manager config committed")
403+
397404
for path, template := range r.templates {
398405
file, err := os.Create(path)
399406
if err != nil {

pkg/router/template/types.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ type certificateManager interface {
103103
WriteCertificatesForConfig(config *ServiceAliasConfig) error
104104
// DeleteCertificatesForConfig deletes all certificates for all ServiceAliasConfigs in config
105105
DeleteCertificatesForConfig(config *ServiceAliasConfig) error
106+
// Commit commits all the changes made to the certificateManager.
107+
Commit() error
106108
// CertificateWriter provides direct access to the underlying writer if required
107109
CertificateWriter() certificateWriter
108110
}

0 commit comments

Comments
 (0)