derp,types,util: use bufio Peek+Discard for allocation-free fast reads (#19067)

Replace byte-at-a-time ReadByte loops with Peek+Discard in the DERP
read path. Peek returns a slice into bufio's internal buffer without
allocating, and Discard advances the read pointer without copying.

Introduce util/bufiox with a BufferedReader interface and ReadFull
helper that uses Peek+copy+Discard as an allocation-free alternative
to io.ReadFull.

  - derp.ReadFrameHeader: replace 5× ReadByte with Peek(5)+Discard(5),
    reading the frame type and length directly from the peeked slice.
    Remove now-unused readUint32 helper.

    name                  old ns/op  new ns/op  speedup
    ReadFrameHeader-8     24.2       12.4       ~2x
    (0 allocs/op in both)

  - key.NodePublic.ReadRawWithoutAllocating: replace 32× ReadByte with
    bufiox.ReadFull. Addresses the "Dear future" comment about switching
    away from byte-at-a-time reads once a non-escaping alternative exists.

    name                              old ns/op  new ns/op  speedup
    NodeReadRawWithoutAllocating-8    140        43.6       ~3.2x
    (0 allocs/op in both)

  - derpserver.handleFramePing: replace io.ReadFull with bufiox.ReadFull.

Updates tailscale/corp#38509

Signed-off-by: Mike O'Driscoll <mikeo@tailscale.com>
This commit is contained in:
Mike O'Driscoll
2026-03-24 10:52:20 -04:00
committed by GitHub
parent 1d0fde6fc2
commit 1403920367
17 changed files with 231 additions and 47 deletions
+2 -5
View File
@@ -141,15 +141,12 @@ func (r nopRead) Read(p []byte) (int, error) {
return len(p), nil
}
var sinkU32 uint32
func BenchmarkReadUint32(b *testing.B) {
func BenchmarkReadFrameHeader(b *testing.B) {
r := bufio.NewReader(nopRead{})
var err error
b.ReportAllocs()
b.ResetTimer()
for range b.N {
sinkU32, err = readUint32(r)
_, _, err := ReadFrameHeader(r)
if err != nil {
b.Fatal(err)
}
+6 -23
View File
@@ -183,21 +183,6 @@ func writeUint32(bw *bufio.Writer, v uint32) error {
return nil
}
func readUint32(br *bufio.Reader) (uint32, error) {
var b [4]byte
// Reading a byte at a time is a bit silly,
// but it causes b not to escape,
// which more than pays for the silliness.
for i := range &b {
c, err := br.ReadByte()
if err != nil {
return 0, err
}
b[i] = c
}
return bin.Uint32(b[:]), nil
}
// ReadFrameTypeHeader reads a frame header from br and
// verifies that the frame type matches wantType.
//
@@ -213,18 +198,16 @@ func ReadFrameTypeHeader(br *bufio.Reader, wantType FrameType) (frameLen uint32,
return frameLen, err
}
// ReadFrameHeader reads the header of a DERP frame,
// reading 5 bytes from br.
// ReadFrameHeader reads a DERP frame header ([FrameHeaderLen] bytes) from br.
// It uses Peek+Discard to read directly from bufio's internal buffer
// without copying or allocating.
func ReadFrameHeader(br *bufio.Reader) (t FrameType, frameLen uint32, err error) {
tb, err := br.ReadByte()
hdr, err := br.Peek(FrameHeaderLen)
if err != nil {
return 0, 0, err
}
frameLen, err = readUint32(br)
if err != nil {
return 0, 0, err
}
return FrameType(tb), frameLen, nil
defer br.Discard(FrameHeaderLen)
return FrameType(hdr[0]), bin.Uint32(hdr[1:FrameHeaderLen]), nil
}
// readFrame reads a frame header and then reads its payload into
+59
View File
@@ -34,6 +34,65 @@ type (
Client = derp.Client
)
func TestReadFrameHeader(t *testing.T) {
tests := []struct {
name string
input [5]byte
wantType derp.FrameType
wantLen uint32
}{
{
name: "SendPacket",
input: [5]byte{byte(derp.FrameSendPacket), 0x00, 0x00, 0x04, 0x00},
wantType: derp.FrameSendPacket,
wantLen: 1024,
},
{
name: "KeepAlive",
input: [5]byte{byte(derp.FrameKeepAlive), 0x00, 0x00, 0x00, 0x00},
wantType: derp.FrameKeepAlive,
wantLen: 0,
},
{
name: "MaxLen",
input: [5]byte{byte(derp.FrameRecvPacket), 0xff, 0xff, 0xff, 0xff},
wantType: derp.FrameRecvPacket,
wantLen: 0xffffffff,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
br := bufio.NewReader(bytes.NewReader(tt.input[:]))
gotType, gotLen, err := derp.ReadFrameHeader(br)
if err != nil {
t.Fatalf("ReadFrameHeader: %v", err)
}
if gotType != tt.wantType {
t.Errorf("type = %v, want %v", gotType, tt.wantType)
}
if gotLen != tt.wantLen {
t.Errorf("len = %v, want %v", gotLen, tt.wantLen)
}
})
}
// Verify zero allocations.
buf := make([]byte, 4096)
rd := bytes.NewReader(buf)
br := bufio.NewReader(rd)
got := testing.AllocsPerRun(1000, func() {
rd.Reset(buf)
br.Reset(rd)
_, _, err := derp.ReadFrameHeader(br)
if err != nil {
t.Fatalf("ReadFrameHeader: %v", err)
}
})
if got != 0 {
t.Fatalf("ReadFrameHeader allocs = %f, want 0", got)
}
}
func TestClientInfoUnmarshal(t *testing.T) {
for i, in := range map[string]struct {
json string
+3 -2
View File
@@ -52,6 +52,7 @@ import (
"tailscale.com/tstime/rate"
"tailscale.com/types/key"
"tailscale.com/types/logger"
"tailscale.com/util/bufiox"
"tailscale.com/util/ctxkey"
"tailscale.com/util/mak"
"tailscale.com/util/set"
@@ -1088,10 +1089,10 @@ func (c *sclient) handleFramePing(ft derp.FrameType, fl uint32) error {
// space for future extensibility, but not too much.
return fmt.Errorf("ping body too large: %v", fl)
}
_, err := io.ReadFull(c.br, m[:])
if err != nil {
if _, err := bufiox.ReadFull(c.br, m[:]); err != nil {
return err
}
var err error
if extra := int64(fl) - int64(len(m)); extra > 0 {
_, err = io.CopyN(io.Discard, c.br, extra)
}