diff --git a/brontide/bench_test.go b/brontide/bench_test.go index 214d61f1fca..0605c54f5cc 100644 --- a/brontide/bench_test.go +++ b/brontide/bench_test.go @@ -2,6 +2,7 @@ package brontide import ( "bytes" + "io" "math" "math/rand" "testing" @@ -63,3 +64,47 @@ func BenchmarkReadHeaderAndBody(t *testing.B) { } require.NoError(t, benchErr) } + +// BenchmarkWriteMessage benchmarks the performance of writing a maximum-sized +// message and flushing it to an io.Discard to measure the allocation and CPU +// overhead of the encryption and writing logic. +func BenchmarkWriteMessage(b *testing.B) { + localConn, remoteConn, err := establishTestConnection(b) + require.NoError(b, err, "unable to establish test connection: %v", err) + + noiseLocalConn, ok := localConn.(*Conn) + require.True(b, ok, "expected *Conn type for localConn") + + // Create the largest possible message we can write (MaxUint16 bytes). + // This is the maximum message size allowed by the protocol. + const maxMsgSize = math.MaxUint16 + largeMsg := bytes.Repeat([]byte("a"), maxMsgSize) + + // Use io.Discard to simulate writing to a network connection that + // continuously accepts data without needing resets. + discard := io.Discard + + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + // Write our massive message, then call flush to actually write + // the encrypted message This simulates a full write operation + // to a network. + err := noiseLocalConn.noise.WriteMessage(largeMsg) + if err != nil { + b.Fatalf("WriteMessage failed: %v", err) + } + _, err = noiseLocalConn.noise.Flush(discard) + if err != nil { + b.Fatalf("Flush failed: %v", err) + } + } + + // We'll make sure to clean up the connections at the end of the + // benchmark. + b.Cleanup(func() { + localConn.Close() + remoteConn.Close() + }) +} diff --git a/brontide/conn.go b/brontide/conn.go index e83c8a5a89c..bf30e2659ee 100644 --- a/brontide/conn.go +++ b/brontide/conn.go @@ -287,3 +287,10 @@ func (c *Conn) RemotePub() *btcec.PublicKey { func (c *Conn) LocalPub() *btcec.PublicKey { return c.noise.localStatic.PubKey() } + +// ClearPendingSend drops references to the next header and body buffers and +// returns any pooled buffers back to their respective pools so that the memory +// can be reused. +func (c *Conn) ClearPendingSend() { + c.noise.releaseBuffers() +} diff --git a/brontide/noise.go b/brontide/noise.go index a55806399a1..a1b7cd4dd94 100644 --- a/brontide/noise.go +++ b/brontide/noise.go @@ -8,6 +8,7 @@ import ( "fmt" "io" "math" + "sync" "time" "github.com/btcsuite/btcd/btcec/v2" @@ -35,6 +36,10 @@ const ( // header and it's MAC. encHeaderSize = lengthHeaderSize + macSize + // maxMessageSize is the maximum size of an encrypted message including + // the MAC. This is the max payload (65535) plus the MAC size (16). + maxMessageSize = math.MaxUint16 + macSize + // keyRotationInterval is the number of messages sent on a single // cipher stream before the keys are rotated forwards. keyRotationInterval = 1000 @@ -65,9 +70,25 @@ var ( ephemeralGen = func() (*btcec.PrivateKey, error) { return btcec.NewPrivateKey() } -) -// TODO(roasbeef): free buffer pool? + // headerBufferPool is a pool for encrypted header buffers. + headerBufferPool = &sync.Pool{ + New: func() interface{} { + b := make([]byte, 0, encHeaderSize) + return &b + }, + } + + // bodyBufferPool is a pool for encrypted message body buffers. + bodyBufferPool = &sync.Pool{ + New: func() interface{} { + // Allocate max size to avoid reallocation. + // maxMessageSize already includes the MAC. + b := make([]byte, 0, maxMessageSize) + return &b + }, + } +) // ecdh performs an ECDH operation between pub and priv. The returned value is // the sha256 of the compressed shared point. @@ -87,6 +108,9 @@ type cipherState struct { // TODO(roasbeef): this should actually be 96 bit nonce uint64 + // nonceBuffer is a reusable buffer for the nonce to avoid allocations. + nonceBuffer [12]byte + // secretKey is the shared symmetric key which will be used to // instantiate the cipher. // @@ -113,10 +137,12 @@ func (c *cipherState) Encrypt(associatedData, cipherText, plainText []byte) []by } }() - var nonce [12]byte - binary.LittleEndian.PutUint64(nonce[4:], c.nonce) + // Write the nonce counter to the buffer (bytes 4-11). + binary.LittleEndian.PutUint64(c.nonceBuffer[4:], c.nonce) - return c.cipher.Seal(cipherText, nonce[:], plainText, associatedData) + return c.cipher.Seal( + cipherText, c.nonceBuffer[:], plainText, associatedData, + ) } // Decrypt attempts to decrypt the passed ciphertext observing the specified @@ -131,10 +157,12 @@ func (c *cipherState) Decrypt(associatedData, plainText, cipherText []byte) ([]b } }() - var nonce [12]byte - binary.LittleEndian.PutUint64(nonce[4:], c.nonce) + // Write the nonce counter to the buffer (bytes 4-11). + binary.LittleEndian.PutUint64(c.nonceBuffer[4:], c.nonce) - return c.cipher.Open(plainText, nonce[:], cipherText, associatedData) + return c.cipher.Open( + plainText, c.nonceBuffer[:], cipherText, associatedData, + ) } // InitializeKey initializes the secret key and AEAD cipher scheme based off of @@ -374,6 +402,9 @@ type Machine struct { // (of the next ciphertext), followed by a 16 byte MAC. nextCipherHeader [encHeaderSize]byte + // pktLenBuffer is a reusable buffer for encoding the packet length. + pktLenBuffer [lengthHeaderSize]byte + // nextHeaderSend holds a reference to the remaining header bytes to // write out for a pending message. This allows us to tolerate timeout // errors that cause partial writes. @@ -383,6 +414,14 @@ type Machine struct { // out for a pending message. This allows us to tolerate timeout errors // that cause partial writes. nextBodySend []byte + + // pooledHeaderBuf is the pooled buffer used for the header, which we + // need to track so we can return it to the pool when done. + pooledHeaderBuf *[]byte + + // pooledBodyBuf is the pooled buffer used for the body, which we need + // to track so we can return it to the pool when done. + pooledBodyBuf *[]byte } // NewBrontideMachine creates a new instance of the brontide state-machine. If @@ -740,14 +779,35 @@ func (b *Machine) WriteMessage(p []byte) error { // NOT include the MAC. fullLength := uint16(len(p)) - var pktLen [2]byte - binary.BigEndian.PutUint16(pktLen[:], fullLength) + binary.BigEndian.PutUint16(b.pktLenBuffer[:], fullLength) - // First, generate the encrypted+MAC'd length prefix for the packet. - b.nextHeaderSend = b.sendCipher.Encrypt(nil, nil, pktLen[:]) + headerBufInterface := headerBufferPool.Get() + headerBuf, ok := headerBufInterface.(*[]byte) + if !ok { + b.releaseBuffers() + return fmt.Errorf("headerBufferPool returned unexpected "+ + "type: %T", headerBufInterface) + } + b.pooledHeaderBuf = headerBuf - // Finally, generate the encrypted packet itself. - b.nextBodySend = b.sendCipher.Encrypt(nil, nil, p) + bodyBufInterface := bodyBufferPool.Get() + bodyBuf, ok := bodyBufInterface.(*[]byte) + if !ok { + b.releaseBuffers() + return fmt.Errorf("bodyBufferPool returned unexpected "+ + "type: %T", bodyBufInterface) + } + b.pooledBodyBuf = bodyBuf + + // First, generate the encrypted+MAC'd length prefix for the packet. We + // pass our pooled buffer as the cipherText (dst) parameter. + b.nextHeaderSend = b.sendCipher.Encrypt( + nil, *b.pooledHeaderBuf, b.pktLenBuffer[:], + ) + + // Finally, generate the encrypted packet itself. We pass our pooled + // buffer as the cipherText (dst) parameter. + b.nextBodySend = b.sendCipher.Encrypt(nil, *b.pooledBodyBuf, p) return nil } @@ -824,9 +884,34 @@ func (b *Machine) Flush(w io.Writer) (int, error) { } } + // If both header and body have been fully flushed, release the pooled + // buffers back to their pools. + if len(b.nextHeaderSend) == 0 && len(b.nextBodySend) == 0 { + b.releaseBuffers() + } + return nn, nil } +// releaseBuffers returns the pooled buffers back to their respective pools +// and clears the references. +func (b *Machine) releaseBuffers() { + if b.pooledHeaderBuf != nil { + *b.pooledHeaderBuf = (*b.pooledHeaderBuf)[:0] + headerBufferPool.Put(b.pooledHeaderBuf) + b.pooledHeaderBuf = nil + } + + if b.pooledBodyBuf != nil { + *b.pooledBodyBuf = (*b.pooledBodyBuf)[:0] + bodyBufferPool.Put(b.pooledBodyBuf) + b.pooledBodyBuf = nil + } + + b.nextHeaderSend = nil + b.nextBodySend = nil +} + // ReadMessage attempts to read the next message from the passed io.Reader. In // the case of an authentication error, a non-nil error is returned. func (b *Machine) ReadMessage(r io.Reader) ([]byte, error) { diff --git a/docs/benchmark_perf_loop.md b/docs/benchmark_perf_loop.md new file mode 100644 index 00000000000..1438553fa81 --- /dev/null +++ b/docs/benchmark_perf_loop.md @@ -0,0 +1,378 @@ +# The Go Performance Optimization Loop: From Benchmarks to Zero Allocations + +When optimizing Go code for performance, particularly in hot paths like +cryptographic operations or protocol handling, the journey from identifying +bottlenecks to achieving zero-allocation code follows a well-defined +methodology. This document walks through the complete optimization loop using +Go's built-in tooling, demonstrating how to systematically eliminate allocations +and improve performance. + +## Understanding the Performance Baseline + +The first step in any optimization effort is establishing a measurable baseline. +Go's benchmark framework provides the foundation for this measurement. When +writing benchmarks for allocation-sensitive code, always include a call to +`b.ReportAllocs()` before `b.ResetTimer()`. This ensures the benchmark reports +both timing and allocation statistics without including setup costs in the +measurements. + +Consider a benchmark that exercises a cryptographic write path with the largest +possible message size to stress test allocations: + +```go +func BenchmarkWriteMessage(b *testing.B) { + // Setup code here... + + b.ReportAllocs() // Essential for tracking allocations + b.ResetTimer() + + for i := 0; i < b.N; i++ { + // Hot path being measured + } +} +``` + +Running the benchmark with `go test -bench=BenchmarkWriteMessage -benchmem +-count=10` provides statistical confidence through multiple runs. The +`-benchmem` flag is redundant if you've called `b.ReportAllocs()`, but it +doesn't hurt to include it explicitly. The output reveals three critical +metrics: nanoseconds per operation, bytes allocated per operation, and the +number of distinct allocations per operation. + +## Profiling Memory Allocations + +Once you have baseline measurements showing undesirable allocations, the next +phase involves profiling to understand where these allocations originate. +Generate memory profiles during benchmark execution using: + +``` +go test -bench=BenchmarkWriteMessage -memprofile=mem.prof -cpuprofile=cpu.prof -count=1 +``` + +The resulting profile can be analyzed through several lenses. To see which +functions allocate the most memory by total bytes, use: +`go tool pprof -alloc_space -top mem.prof`. + +However, for understanding allocation frequency rather than size, `go tool pprof -alloc_objects -top mem.prof` often provides more actionable insights, especially when hunting small but frequent allocations. + +Here's what the allocation object analysis might reveal: + +``` +$ go tool pprof -alloc_objects -top mem.prof | head -20 +File: brontide.test +Type: alloc_objects +Time: Aug 30, 2024 at 2:07pm (WEST) +Showing nodes accounting for 39254, 100% of 39272 total +Dropped 32 nodes (cum <= 196) + flat flat% sum% cum cum% + 32768 83.44% 83.44% 32768 83.44% github.com/lightningnetwork/lnd/brontide.(*cipherState).Encrypt + 5461 13.91% 97.34% 5461 13.91% runtime.acquireSudog + 1025 2.61% 100% 1025 2.61% runtime.allocm +``` + +This output immediately shows that `cipherState.Encrypt` is responsible for 83% +of allocations by count, focusing our investigation. + +The most powerful profiling technique involves examining allocations at the +source line level. Running `go tool pprof -list 'FunctionName' mem.prof` shows +exactly which lines within a function trigger heap allocations: + +``` +$ go tool pprof -list 'cipherState.*Encrypt' mem.prof +Total: 8.73MB +ROUTINE ======================== github.com/lightningnetwork/lnd/brontide.(*cipherState).Encrypt + 512.01kB 512.01kB (flat, cum) 5.73% of Total + . . 111:func (c *cipherState) Encrypt(associatedData, cipherText, plainText []byte) []byte { + . . 112: defer func() { + . . 113: c.nonce++ + . . 114: + . . 115: if c.nonce == keyRotationInterval { + . . 116: c.rotateKey() + . . 117: } + . . 118: }() + . . 119: + 512.01kB 512.01kB 120: var nonce [12]byte + . . 121: binary.LittleEndian.PutUint64(nonce[4:], c.nonce) + . . 122: + . . 123: return c.cipher.Seal(cipherText, nonce[:], plainText, associatedData) +``` + +This granular view reveals that line 120, a seemingly innocent stack array +declaration, is allocating 512KB total across all benchmark iterations. + +## CPU Profiling for Hot Spots + +While memory allocations often dominate optimization efforts, CPU profiling +reveals where computational time is spent. The CPU profile generated alongside +the memory profile provides complementary insights: + +``` +$ go tool pprof -top cpu.prof | head -15 +File: brontide.test +Type: cpu +Time: Aug 30, 2024 at 2:07pm (WEST) +Duration: 1.8s, Total samples = 1.71s (94.40%) +Showing nodes accounting for 1.65s, 96.49% of 1.71s total + flat flat% sum% cum cum% + 0.51s 29.82% 29.82% 0.51s 29.82% vendor/golang.org/x/crypto/chacha20poly1305.(*chacha20poly1305).sealGeneric + 0.28s 16.37% 46.20% 0.28s 16.37% vendor/golang.org/x/crypto/internal/poly1305.updateGeneric + 0.24s 14.04% 60.23% 0.24s 14.04% vendor/golang.org/x/crypto/chacha20.(*Cipher).XORKeyStream + 0.19s 11.11% 71.35% 0.19s 11.11% runtime.memmove + 0.12s 7.02% 78.36% 0.86s 50.29% github.com/lightningnetwork/lnd/brontide.(*cipherState).Encrypt +``` + +This profile shows that cryptographic operations dominate CPU usage, which is +expected. However, note the presence of `runtime.memmove` at 11% - this often +indicates unnecessary copying that could be eliminated through careful buffer +management. + +For line-level CPU analysis of a specific function: + +``` +$ go tool pprof -list 'WriteMessage' cpu.prof +Total: 1.71s +ROUTINE ======================== github.com/lightningnetwork/lnd/brontide.(*Machine).WriteMessage + 10ms 1.21s (flat, cum) 70.76% of Total + . . 734:func (b *Machine) WriteMessage(p []byte) error { + . . 735: if len(p) > math.MaxUint16 { + . . 736: return ErrMaxMessageLengthExceeded + . . 737: } + . . 738: + . 10ms 739: if len(b.nextHeaderSend) > 0 || len(b.nextBodySend) > 0 { + . . 740: return ErrMessageNotFlushed + . . 741: } + . . 742: + 10ms 10ms 743: fullLength := uint16(len(p)) + . . 744: var pktLen [2]byte + . 10ms 745: binary.BigEndian.PutUint16(pktLen[:], fullLength) + . . 746: + . 580ms 747: b.nextHeaderSend = b.sendCipher.Encrypt(nil, nil, pktLen[:]) + . 600ms 748: b.nextBodySend = b.sendCipher.Encrypt(nil, nil, p) +``` + +This shows that the two `Encrypt` calls consume virtually all the CPU time in +`WriteMessage`, confirming that cryptographic operations are the bottleneck +rather than the message handling logic itself. + +## Understanding Escape Analysis + +When the profiler indicates that seemingly stack-local variables are being heap +allocated, escape analysis becomes your next investigative tool. The Go +compiler's escape analysis determines whether variables can remain on the stack +or must be moved to the heap. Variables escape to the heap when their lifetime +extends beyond the function that creates them or when the compiler cannot prove +they remain local. + +To see the compiler's escape analysis decisions, build with verbose flags: + +``` +go build -gcflags="-m" ./... +``` + +For more detailed output including the reasons for escape, use `-m=2`. The +output reveals escape flows, showing exactly why variables move to the heap. +When investigating specific escapes, you can grep for the variable in question: + +``` +$ go build -gcflags="-m=2" ./... 2>&1 | grep -A2 -B2 "nonce escapes" +./noise.go:183:17: &errors.errorString{...} does not escape +./noise.go:183:17: new(chacha20poly1305.chacha20poly1305) escapes to heap +./noise.go:120:6: nonce escapes to heap: +./noise.go:120:6: flow: {heap} = &nonce: +./noise.go:120:6: from nonce (address-of) at ./noise.go:123:40 +-- +./noise.go:469:21: &keychain.PrivKeyECDH{...} escapes to heap +./noise.go:483:40: []byte{} escapes to heap +./noise.go:138:6: nonce escapes to heap: +./noise.go:138:6: flow: {heap} = &nonce: +./noise.go:138:6: from nonce (address-of) at ./noise.go:141:39 +``` + +This output shows the exact flow analysis: the nonce array escapes because its +address is taken when creating a slice (`nonce[:]`) and passed to a function +that the compiler cannot fully analyze. + +Common causes include passing pointers to interfaces, storing references in +heap-allocated structures, or passing slices of stack arrays to functions that +might retain them. A particularly instructive example is the seemingly innocent +pattern of passing a stack array to a function: + +```go +var nonce [12]byte +binary.LittleEndian.PutUint64(nonce[4:], counter) +return cipher.Seal(ciphertext, nonce[:], plaintext, nil) +``` + +Here, `nonce[:]` creates a slice backed by the stack array, but if the compiler +cannot prove that `cipher.Seal` won't retain a reference to this slice, the +entire array escapes to the heap. + +## The Optimization Strategy + +Armed with profiling data and escape analysis insights, the optimization phase +begins. The general strategy for eliminating allocations follows a predictable +pattern: move temporary buffers from function scope to longer-lived structures, +typically as fields in the enclosing type. This transformation changes +allocation from per-operation to per-instance. + +For the nonce example above, the optimization involves adding a buffer field to +the containing struct: + +```go +type cipherState struct { + // ... other fields ... + nonceBuffer [12]byte // Reusable buffer to avoid allocations +} + +func (c *cipherState) Encrypt(...) []byte { + binary.LittleEndian.PutUint64(c.nonceBuffer[4:], c.nonce) + return c.cipher.Seal(ciphertext, c.nonceBuffer[:], plaintext, nil) +} +``` + +This pattern extends to any temporary buffer. When dealing with variable-sized +data up to a known maximum, pre-allocate buffers at that maximum size and slice +into them as needed. The key insight is using the three-index slice notation to +control capacity separately from length: + +```go +// Pre-allocated: var buffer [maxSize]byte + +// Creating a zero-length slice with full capacity for append: +slice := buffer[:0] // length=0, capacity=maxSize +``` + +## Verification and Iteration + +After implementing optimizations, the cycle returns to benchmarking. Run the +same benchmark to measure improvement, but don't stop at the aggregate numbers. +Generate new profiles to verify that specific allocations have been eliminated +and to identify any remaining allocation sites. + +The benchstat tool provides statistical comparison between runs: + +``` +go test -bench=BenchmarkWriteMessage -count=10 > old.txt +# Make optimizations +go test -bench=BenchmarkWriteMessage -count=10 > new.txt +benchstat old.txt new.txt +``` + +This comparison reveals not just whether performance improved, but whether the +improvement is statistically significant. A typical benchstat output after +successful optimization looks like: + +``` +goos: darwin +goarch: arm64 +pkg: github.com/lightningnetwork/lnd/brontide +cpu: Apple M4 Max + │ old.txt │ new.txt │ + │ sec/op │ sec/op vs base │ +WriteMessage-16 50.34µ ± 1% 46.48µ ± 0% -7.68% (p=0.000 n=10) + + │ old.txt │ new.txt │ + │ B/op │ B/op vs base │ +WriteMessage-16 73788.000 ± 0% 2.000 ± 0% -100.00% (p=0.000 n=10) + + │ old.txt │ new.txt │ + │ allocs/op │ allocs/op vs base │ +WriteMessage-16 5.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=10) +``` + +The key metrics to examine are: +- The percentage change (vs base column) showing the magnitude of improvement + +- The p-value (p=0.000) indicating statistical significance - values below 0.05 +suggest real improvements rather than noise + +- The variance (± percentages) showing consistency across runs + +This output confirms both a 7.68% speed improvement and complete elimination of +allocations, with high statistical confidence. + +If allocations remain, the cycle continues. Profile again, identify the source, +understand why the allocation occurs through escape analysis, and apply the +appropriate optimization pattern. Each iteration should show measurable progress +toward the goal of zero allocations in the hot path. + +## Advanced Techniques + +When standard profiling doesn't reveal the allocation source, more advanced +techniques come into play. Memory profiling with different granularities can +help. Instead of looking at total allocations, examine the profile with `go tool +pprof -sample_index=alloc_objects` to focus on allocation count rather than +size. This distinction matters when hunting for small, frequent allocations that +might not show up prominently in byte-focused views. + +Additional pprof commands that prove invaluable during optimization: + +```bash +# Interactive mode for exploring the profile +go tool pprof mem.prof +(pprof) top10 # Show top 10 memory consumers +(pprof) list regexp # List functions matching regexp +(pprof) web # Open visual graph in browser + +# Generate a flame graph for visual analysis +go tool pprof -http=:8080 mem.prof + +# Compare two profiles directly +go tool pprof -base=old.prof new.prof + +# Show allocations only from specific packages +go tool pprof -focus=github.com/lightningnetwork/lnd/brontide mem.prof + +# Check for specific small allocations +go tool pprof -alloc_space -inuse_space mem.prof +``` + +When dealing with elusive allocations, checking what might be escaping to heap +can be done more surgically: + +```bash +# Check specific function or type for escapes +go build -gcflags="-m" 2>&1 | grep -E "(YourType|yourFunc)" + +# See all heap allocations in a package +go build -gcflags="-m" 2>&1 | grep "moved to heap" + +# Check which variables are confirmed to stay on the stack +go build -gcflags="-m=2" 2>&1 | grep "does not escape" +``` + +For particularly elusive allocations, instrumenting the code with runtime memory +statistics can provide real-time feedback: + +```go +var m runtime.MemStats +runtime.ReadMemStats(&m) +before := m.Alloc +// Operation being measured +runtime.ReadMemStats(&m) +allocated := m.Alloc - before +``` + +While this approach adds overhead and shouldn't be used in production, it can +help isolate allocations to specific code sections during development. + +## The Zero-Allocation Goal + +Achieving zero allocations in hot paths represents more than just a performance +optimization. It provides predictable latency, reduces garbage collection +pressure, and improves overall system behavior under load. In systems handling +thousands of operations per second, the difference between five allocations per +operation and zero can mean the difference between smooth operation and periodic +latency spikes during garbage collection. + +The journey from initial benchmark to zero-allocation code demonstrates the +power of Go's built-in tooling. By systematically applying the +benchmark-profile-optimize loop, even complex code paths can be transformed into +allocation-free implementations. The key lies not in guessing or premature +optimization, but in measuring, understanding, and methodically addressing each +allocation source. + +It's best to focus optimization efforts on true hot paths identified through +production profiling or realistic load testing. The techniques described here +provide the tools to achieve zero-allocation code when it matters, but the +judgment of when to apply them remains a critical engineering decision. diff --git a/peer/brontide.go b/peer/brontide.go index 57e340fb0e8..1c9073f199c 100644 --- a/peer/brontide.go +++ b/peer/brontide.go @@ -19,6 +19,7 @@ import ( "github.com/btcsuite/btcd/txscript" "github.com/btcsuite/btcd/wire" "github.com/btcsuite/btclog/v2" + "github.com/lightningnetwork/lnd/brontide" "github.com/lightningnetwork/lnd/buffer" "github.com/lightningnetwork/lnd/chainntnfs" "github.com/lightningnetwork/lnd/channeldb" @@ -2705,6 +2706,13 @@ out: goto retry } + // Message has either been successfully sent or an + // unrecoverable error occurred. Either way, we can + // free the memory used to store the message. + if bConn, ok := p.cfg.Conn.(*brontide.Conn); ok { + bConn.ClearPendingSend() + } + // The write succeeded, reset the idle timer to prevent // us from disconnecting the peer. if !idleTimer.Stop() {