Skip to content

Commit 928f1cd

Browse files
committed
Basic high-availability for auto egress IPs
If a namespace has multiple egress IPs, monitor egress traffic and switch to an alternate egress IP if the currently-selected one appears dead.
1 parent f295bbe commit 928f1cd

File tree

4 files changed

+483
-29
lines changed

4 files changed

+483
-29
lines changed

pkg/network/node/egressip.go

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,16 @@ import (
1515
networkapi "github.com/openshift/origin/pkg/network/apis/network"
1616
"github.com/openshift/origin/pkg/network/common"
1717
networkinformers "github.com/openshift/origin/pkg/network/generated/informers/internalversion"
18+
"github.com/openshift/origin/pkg/util/netutils"
1819

1920
"github.com/vishvananda/netlink"
2021
)
2122

2223
type nodeEgress struct {
2324
nodeIP string
25+
sdnIP string
2426
requestedIPs sets.String
27+
offline bool
2528
}
2629

2730
type namespaceEgress struct {
@@ -48,6 +51,7 @@ type egressIPWatcher struct {
4851

4952
networkInformers networkinformers.SharedInformerFactory
5053
iptables *NodeIPTables
54+
vxlanMonitor *egressVXLANMonitor
5155

5256
nodesByNodeIP map[string]*nodeEgress
5357
namespacesByVNID map[uint32]*namespaceEgress
@@ -87,6 +91,10 @@ func (eip *egressIPWatcher) Start(networkInformers networkinformers.SharedInform
8791
eip.networkInformers = networkInformers
8892
eip.iptables = iptables
8993

94+
updates := make(chan *egressVXLANNode)
95+
eip.vxlanMonitor = newEgressVXLANMonitor(eip.oc.ovs, updates)
96+
go eip.watchVXLAN(updates)
97+
9098
eip.watchHostSubnets()
9199
eip.watchNetNamespaces()
92100
return nil
@@ -179,17 +187,23 @@ func (eip *egressIPWatcher) handleAddOrUpdateHostSubnet(obj, _ interface{}, even
179187
hs := obj.(*networkapi.HostSubnet)
180188
glog.V(5).Infof("Watch %s event for HostSubnet %q", eventType, hs.Name)
181189

182-
eip.updateNodeEgress(hs.HostIP, hs.EgressIPs)
190+
_, cidr, err := net.ParseCIDR(hs.Subnet)
191+
if err != nil {
192+
utilruntime.HandleError(fmt.Errorf("could not parse HostSubnet %q CIDR: %v", hs.Name, err))
193+
}
194+
sdnIP := netutils.GenerateDefaultGateway(cidr).String()
195+
196+
eip.updateNodeEgress(hs.HostIP, sdnIP, hs.EgressIPs)
183197
}
184198

185199
func (eip *egressIPWatcher) handleDeleteHostSubnet(obj interface{}) {
186200
hs := obj.(*networkapi.HostSubnet)
187201
glog.V(5).Infof("Watch %s event for HostSubnet %q", watch.Deleted, hs.Name)
188202

189-
eip.updateNodeEgress(hs.HostIP, nil)
203+
eip.updateNodeEgress(hs.HostIP, "", nil)
190204
}
191205

192-
func (eip *egressIPWatcher) updateNodeEgress(nodeIP string, nodeEgressIPs []string) {
206+
func (eip *egressIPWatcher) updateNodeEgress(nodeIP, sdnIP string, nodeEgressIPs []string) {
193207
eip.Lock()
194208
defer eip.Unlock()
195209

@@ -200,11 +214,18 @@ func (eip *egressIPWatcher) updateNodeEgress(nodeIP string, nodeEgressIPs []stri
200214
}
201215
node = &nodeEgress{
202216
nodeIP: nodeIP,
217+
sdnIP: sdnIP,
203218
requestedIPs: sets.NewString(),
204219
}
205220
eip.nodesByNodeIP[nodeIP] = node
221+
if eip.vxlanMonitor != nil && node.nodeIP != eip.localIP {
222+
eip.vxlanMonitor.AddNode(node.nodeIP, node.sdnIP)
223+
}
206224
} else if len(nodeEgressIPs) == 0 {
207225
delete(eip.nodesByNodeIP, nodeIP)
226+
if eip.vxlanMonitor != nil {
227+
eip.vxlanMonitor.RemoveNode(node.nodeIP)
228+
}
208229
}
209230
oldRequestedIPs := node.requestedIPs
210231
node.requestedIPs = sets.NewString(nodeEgressIPs...)
@@ -350,6 +371,8 @@ func (eip *egressIPWatcher) syncEgressNamespaceState(ns *namespaceEgress) error
350371
if active == nil {
351372
if eg.assignedNodeIP == "" {
352373
glog.V(4).Infof("VNID %d cannot use unassigned egress IP %s", ns.vnid, eg.ip)
374+
} else if len(ns.requestedIPs) > 1 && eg.nodes[0].offline {
375+
glog.V(4).Infof("VNID %d cannot use egress IP %s on offline node %s", ns.vnid, eg.ip, eg.assignedNodeIP)
353376
} else {
354377
active = eg
355378
}
@@ -429,3 +452,29 @@ func (eip *egressIPWatcher) releaseEgressIP(egressIP, mark string) error {
429452

430453
return nil
431454
}
455+
456+
func (eip *egressIPWatcher) watchVXLAN(updates chan *egressVXLANNode) {
457+
for node := range updates {
458+
eip.updateNode(node.nodeIP, node.offline)
459+
}
460+
}
461+
462+
func (eip *egressIPWatcher) updateNode(nodeIP string, offline bool) {
463+
eip.Lock()
464+
defer eip.Unlock()
465+
466+
node := eip.nodesByNodeIP[nodeIP]
467+
if node == nil {
468+
eip.vxlanMonitor.RemoveNode(nodeIP)
469+
return
470+
}
471+
472+
node.offline = offline
473+
for _, ip := range node.requestedIPs.UnsortedList() {
474+
eg := eip.egressIPs[ip]
475+
if eg != nil {
476+
eip.egressIPChanged(eg)
477+
}
478+
}
479+
eip.syncEgressIPs()
480+
}

pkg/network/node/egressip_test.go

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,8 @@ func setupEgressIPWatcher(t *testing.T) (*egressIPWatcher, []string) {
142142
func TestEgressIP(t *testing.T) {
143143
eip, flows := setupEgressIPWatcher(t)
144144

145-
eip.updateNodeEgress("172.17.0.3", []string{})
146-
eip.updateNodeEgress("172.17.0.4", []string{})
145+
eip.updateNodeEgress("172.17.0.3", "", []string{})
146+
eip.updateNodeEgress("172.17.0.4", "", []string{})
147147
eip.deleteNamespaceEgress(42)
148148
eip.deleteNamespaceEgress(43)
149149

@@ -168,7 +168,7 @@ func TestEgressIP(t *testing.T) {
168168
t.Fatalf("%v", err)
169169
}
170170

171-
eip.updateNodeEgress("172.17.0.3", []string{"172.17.0.100"}) // Added .100
171+
eip.updateNodeEgress("172.17.0.3", "", []string{"172.17.0.100"}) // Added .100
172172
err = assertNoNetlinkChanges(eip)
173173
if err != nil {
174174
t.Fatalf("%v", err)
@@ -179,8 +179,8 @@ func TestEgressIP(t *testing.T) {
179179
}
180180

181181
// Assign HostSubnet.EgressIP first, then NetNamespace.EgressIP, with a remote EgressIP
182-
eip.updateNodeEgress("172.17.0.3", []string{"172.17.0.101", "172.17.0.100"}) // Added .101
183-
eip.updateNodeEgress("172.17.0.5", []string{"172.17.0.105"}) // Added .105
182+
eip.updateNodeEgress("172.17.0.3", "", []string{"172.17.0.101", "172.17.0.100"}) // Added .101
183+
eip.updateNodeEgress("172.17.0.5", "", []string{"172.17.0.105"}) // Added .105
184184
err = assertNoNetlinkChanges(eip)
185185
if err != nil {
186186
t.Fatalf("%v", err)
@@ -222,7 +222,7 @@ func TestEgressIP(t *testing.T) {
222222
t.Fatalf("%v", err)
223223
}
224224

225-
eip.updateNodeEgress("172.17.0.4", []string{"172.17.0.102", "172.17.0.104"}) // Added .102, .104
225+
eip.updateNodeEgress("172.17.0.4", "", []string{"172.17.0.102", "172.17.0.104"}) // Added .102, .104
226226
err = assertNetlinkChange(eip, "claim 172.17.0.104")
227227
if err != nil {
228228
t.Fatalf("%v", err)
@@ -245,7 +245,7 @@ func TestEgressIP(t *testing.T) {
245245
}
246246

247247
// Assign HostSubnet.EgressIP first, then NetNamespace.EgressIP, with a local EgressIP
248-
eip.updateNodeEgress("172.17.0.4", []string{"172.17.0.102", "172.17.0.103"}) // Added .103, Dropped .104
248+
eip.updateNodeEgress("172.17.0.4", "", []string{"172.17.0.102", "172.17.0.103"}) // Added .103, Dropped .104
249249
err = assertNoNetlinkChanges(eip)
250250
if err != nil {
251251
t.Fatalf("%v", err)
@@ -288,7 +288,7 @@ func TestEgressIP(t *testing.T) {
288288
}
289289

290290
// Drop remote node EgressIP
291-
eip.updateNodeEgress("172.17.0.3", []string{"172.17.0.100"}) // Dropped .101
291+
eip.updateNodeEgress("172.17.0.3", "", []string{"172.17.0.100"}) // Dropped .101
292292
err = assertNoNetlinkChanges(eip)
293293
if err != nil {
294294
t.Fatalf("%v", err)
@@ -299,7 +299,7 @@ func TestEgressIP(t *testing.T) {
299299
}
300300

301301
// Drop local node EgressIP
302-
eip.updateNodeEgress("172.17.0.4", []string{"172.17.0.102"}) // Dropped .103
302+
eip.updateNodeEgress("172.17.0.4", "", []string{"172.17.0.102"}) // Dropped .103
303303
err = assertNetlinkChange(eip, "release 172.17.0.103")
304304
if err != nil {
305305
t.Fatalf("%v", err)
@@ -310,7 +310,7 @@ func TestEgressIP(t *testing.T) {
310310
}
311311

312312
// Add them back, swapped
313-
eip.updateNodeEgress("172.17.0.3", []string{"172.17.0.100", "172.17.0.103"}) // Added .103
313+
eip.updateNodeEgress("172.17.0.3", "", []string{"172.17.0.100", "172.17.0.103"}) // Added .103
314314
err = assertNoNetlinkChanges(eip)
315315
if err != nil {
316316
t.Fatalf("%v", err)
@@ -320,7 +320,7 @@ func TestEgressIP(t *testing.T) {
320320
t.Fatalf("%v", err)
321321
}
322322

323-
eip.updateNodeEgress("172.17.0.4", []string{"172.17.0.101", "172.17.0.102"}) // Added .101
323+
eip.updateNodeEgress("172.17.0.4", "", []string{"172.17.0.101", "172.17.0.102"}) // Added .101
324324
err = assertNetlinkChange(eip, "claim 172.17.0.101")
325325
if err != nil {
326326
t.Fatalf("%v", err)
@@ -335,7 +335,7 @@ func TestMultipleNamespaceEgressIPs(t *testing.T) {
335335
eip, flows := setupEgressIPWatcher(t)
336336

337337
eip.updateNamespaceEgress(42, []string{"172.17.0.100"})
338-
eip.updateNodeEgress("172.17.0.3", []string{"172.17.0.100"})
338+
eip.updateNodeEgress("172.17.0.3", "", []string{"172.17.0.100"})
339339
err := assertOVSChanges(eip, &flows,
340340
egressOVSChange{vnid: 42, egress: Remote, remote: "172.17.0.3"},
341341
)
@@ -351,7 +351,7 @@ func TestMultipleNamespaceEgressIPs(t *testing.T) {
351351
}
352352

353353
// Now assigning that IP to a node should switch OVS to use that since it's first in the list
354-
eip.updateNodeEgress("172.17.0.4", []string{"172.17.0.101"})
354+
eip.updateNodeEgress("172.17.0.4", "", []string{"172.17.0.101"})
355355
err = assertOVSChanges(eip, &flows,
356356
egressOVSChange{vnid: 42, egress: Local},
357357
)
@@ -369,7 +369,7 @@ func TestMultipleNamespaceEgressIPs(t *testing.T) {
369369
}
370370

371371
// Removing the original egress IP from its node should leave us with one working IP
372-
eip.updateNodeEgress("172.17.0.3", nil)
372+
eip.updateNodeEgress("172.17.0.3", "", nil)
373373
err = assertOVSChanges(eip, &flows,
374374
egressOVSChange{vnid: 42, egress: Local},
375375
)
@@ -378,7 +378,7 @@ func TestMultipleNamespaceEgressIPs(t *testing.T) {
378378
}
379379

380380
// Removing the remaining egress IP should now kill the namespace
381-
eip.updateNodeEgress("172.17.0.4", []string{"172.17.0.200"})
381+
eip.updateNodeEgress("172.17.0.4", "", []string{"172.17.0.200"})
382382
err = assertOVSChanges(eip, &flows,
383383
egressOVSChange{vnid: 42, egress: Dropped},
384384
)
@@ -387,8 +387,8 @@ func TestMultipleNamespaceEgressIPs(t *testing.T) {
387387
}
388388

389389
// Now add the egress IPs back...
390-
eip.updateNodeEgress("172.17.0.3", []string{"172.17.0.100"})
391-
eip.updateNodeEgress("172.17.0.4", []string{"172.17.0.101"})
390+
eip.updateNodeEgress("172.17.0.3", "", []string{"172.17.0.100"})
391+
eip.updateNodeEgress("172.17.0.4", "", []string{"172.17.0.101"})
392392
err = assertOVSChanges(eip, &flows,
393393
egressOVSChange{vnid: 42, egress: Remote, remote: "172.17.0.3"},
394394
)
@@ -439,7 +439,7 @@ func TestNodeIPAsEgressIP(t *testing.T) {
439439
eip, flows := setupEgressIPWatcher(t)
440440

441441
// Trying to assign node IP as egress IP should fail. (It will log an error but this test doesn't notice that.)
442-
eip.updateNodeEgress("172.17.0.4", []string{"172.17.0.4", "172.17.0.102"})
442+
eip.updateNodeEgress("172.17.0.4", "", []string{"172.17.0.4", "172.17.0.102"})
443443
err := assertNoNetlinkChanges(eip)
444444
if err != nil {
445445
t.Fatalf("%v", err)
@@ -454,7 +454,7 @@ func TestDuplicateNodeEgressIPs(t *testing.T) {
454454
eip, flows := setupEgressIPWatcher(t)
455455

456456
eip.updateNamespaceEgress(42, []string{"172.17.0.100"})
457-
eip.updateNodeEgress("172.17.0.3", []string{"172.17.0.100"})
457+
eip.updateNodeEgress("172.17.0.3", "", []string{"172.17.0.100"})
458458
err := assertOVSChanges(eip, &flows, egressOVSChange{vnid: 42, egress: Remote, remote: "172.17.0.3"})
459459
if err != nil {
460460
t.Fatalf("%v", err)
@@ -463,7 +463,7 @@ func TestDuplicateNodeEgressIPs(t *testing.T) {
463463
// Adding the Egress IP to another node should not work and should cause the
464464
// namespace to start dropping traffic. (And in particular, even though we're
465465
// adding the Egress IP to the local node, there should not be a netlink change.)
466-
eip.updateNodeEgress("172.17.0.4", []string{"172.17.0.100"})
466+
eip.updateNodeEgress("172.17.0.4", "", []string{"172.17.0.100"})
467467
err = assertNoNetlinkChanges(eip)
468468
if err != nil {
469469
t.Fatalf("%v", err)
@@ -474,7 +474,7 @@ func TestDuplicateNodeEgressIPs(t *testing.T) {
474474
}
475475

476476
// Removing the duplicate node egressIP should restore traffic to the broken namespace
477-
eip.updateNodeEgress("172.17.0.4", []string{})
477+
eip.updateNodeEgress("172.17.0.4", "", []string{})
478478
err = assertNoNetlinkChanges(eip)
479479
if err != nil {
480480
t.Fatalf("%v", err)
@@ -485,7 +485,7 @@ func TestDuplicateNodeEgressIPs(t *testing.T) {
485485
}
486486

487487
// As above, but with a remote node IP
488-
eip.updateNodeEgress("172.17.0.5", []string{"172.17.0.100"})
488+
eip.updateNodeEgress("172.17.0.5", "", []string{"172.17.0.100"})
489489
err = assertOVSChanges(eip, &flows, egressOVSChange{vnid: 42, egress: Dropped})
490490
if err != nil {
491491
t.Fatalf("%v", err)
@@ -507,7 +507,7 @@ func TestDuplicateNodeEgressIPs(t *testing.T) {
507507

508508
// Removing the original egress node should result in the "duplicate" egress node
509509
// now being used.
510-
eip.updateNodeEgress("172.17.0.3", []string{})
510+
eip.updateNodeEgress("172.17.0.3", "", []string{})
511511
err = assertOVSChanges(eip, &flows, egressOVSChange{vnid: 42, egress: Remote, remote: "172.17.0.5"})
512512
if err != nil {
513513
t.Fatalf("%v", err)
@@ -518,7 +518,7 @@ func TestDuplicateNamespaceEgressIPs(t *testing.T) {
518518
eip, flows := setupEgressIPWatcher(t)
519519

520520
eip.updateNamespaceEgress(42, []string{"172.17.0.100"})
521-
eip.updateNodeEgress("172.17.0.3", []string{"172.17.0.100"})
521+
eip.updateNodeEgress("172.17.0.3", "", []string{"172.17.0.100"})
522522
err := assertOVSChanges(eip, &flows, egressOVSChange{vnid: 42, egress: Remote, remote: "172.17.0.3"})
523523
if err != nil {
524524
t.Fatalf("%v", err)
@@ -560,7 +560,7 @@ func TestDuplicateNamespaceEgressIPs(t *testing.T) {
560560
// cause the rules to get deleted and then added back in the opposite order,
561561
// which assertNoOVSChanges() would complain about, so we have to use
562562
// assertOVSChanges() instead.
563-
eip.updateNodeEgress("172.17.0.3", []string{})
563+
eip.updateNodeEgress("172.17.0.3", "", []string{})
564564
err = assertOVSChanges(eip, &flows,
565565
egressOVSChange{vnid: 42, egress: Dropped},
566566
egressOVSChange{vnid: 43, egress: Dropped},
@@ -569,7 +569,7 @@ func TestDuplicateNamespaceEgressIPs(t *testing.T) {
569569
t.Fatalf("%v", err)
570570
}
571571

572-
eip.updateNodeEgress("172.17.0.3", []string{"172.17.0.100"})
572+
eip.updateNodeEgress("172.17.0.3", "", []string{"172.17.0.100"})
573573
err = assertOVSChanges(eip, &flows,
574574
egressOVSChange{vnid: 42, egress: Dropped},
575575
egressOVSChange{vnid: 43, egress: Dropped},

0 commit comments

Comments
 (0)