Files
2024-09-19 21:38:24 -04:00

635 lines
16 KiB
Go

package perf
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"math"
"os"
"syscall"
"testing"
"time"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/testutils"
"github.com/cilium/ebpf/internal/testutils/fdtrace"
"github.com/cilium/ebpf/internal/unix"
qt "github.com/frankban/quicktest"
)
var (
readTimeout = 250 * time.Millisecond
)
func TestMain(m *testing.M) {
fdtrace.TestMain(m)
}
func TestPerfReader(t *testing.T) {
events := perfEventArray(t)
rd, err := NewReader(events, 4096)
if err != nil {
t.Fatal(err)
}
defer rd.Close()
outputSamples(t, events, 5)
checkRecord(t, rd)
rd.SetDeadline(time.Now().Add(4 * time.Millisecond))
_, err = rd.Read()
qt.Assert(t, errors.Is(err, os.ErrDeadlineExceeded), qt.IsTrue, qt.Commentf("expected os.ErrDeadlineExceeded"))
}
func TestReaderSetDeadline(t *testing.T) {
events := perfEventArray(t)
rd, err := NewReader(events, 4096)
if err != nil {
t.Fatal(err)
}
defer rd.Close()
rd.SetDeadline(time.Now().Add(-time.Second))
if _, err := rd.Read(); !errors.Is(err, os.ErrDeadlineExceeded) {
t.Error("Expected os.ErrDeadlineExceeded from first Read, got:", err)
}
if _, err := rd.Read(); !errors.Is(err, os.ErrDeadlineExceeded) {
t.Error("Expected os.ErrDeadlineExceeded from second Read, got:", err)
}
}
func outputSamples(tb testing.TB, events *ebpf.Map, sampleSizes ...byte) {
prog := outputSamplesProg(tb, events, sampleSizes...)
ret, _, err := prog.Test(internal.EmptyBPFContext)
testutils.SkipIfNotSupported(tb, err)
if err != nil {
tb.Fatal(err)
}
if errno := syscall.Errno(-int32(ret)); errno != 0 {
tb.Fatal("Expected 0 as return value, got", errno)
}
}
// outputSamplesProg creates a program which submits a series of samples to a PerfEventArray.
//
// The format of each sample is:
//
// index: 0 1 2 3 ... size - 1
// content: size id 0xff 0xff ... 0xff [padding]
//
// padding is an implementation detail of the perf buffer and 1-7 bytes long. The
// contents are undefined.
func outputSamplesProg(tb testing.TB, events *ebpf.Map, sampleSizes ...byte) *ebpf.Program {
tb.Helper()
// Requires at least 4.9 (0515e5999a46 "bpf: introduce BPF_PROG_TYPE_PERF_EVENT program type")
testutils.SkipOnOldKernel(tb, "4.9", "perf events support")
const bpfFCurrentCPU = 0xffffffff
var maxSampleSize byte
for _, sampleSize := range sampleSizes {
if sampleSize < 2 {
tb.Fatalf("Sample size %d is too small to contain size and counter", sampleSize)
}
if sampleSize > maxSampleSize {
maxSampleSize = sampleSize
}
}
// Fill a buffer on the stack, and stash context somewhere
insns := asm.Instructions{
asm.LoadImm(asm.R0, ^int64(0), asm.DWord),
asm.Mov.Reg(asm.R9, asm.R1),
}
bufDwords := int(maxSampleSize/8) + 1
for i := 0; i < bufDwords; i++ {
insns = append(insns,
asm.StoreMem(asm.RFP, int16(i+1)*-8, asm.R0, asm.DWord),
)
}
for i, sampleSize := range sampleSizes {
insns = append(insns,
// Restore stashed context.
asm.Mov.Reg(asm.R1, asm.R9),
// map
asm.LoadMapPtr(asm.R2, events.FD()),
// flags
asm.LoadImm(asm.R3, bpfFCurrentCPU, asm.DWord),
// buffer
asm.Mov.Reg(asm.R4, asm.RFP),
asm.Add.Imm(asm.R4, int32(bufDwords*-8)),
// buffer[0] = size
asm.StoreImm(asm.R4, 0, int64(sampleSize), asm.Byte),
// buffer[1] = i
asm.StoreImm(asm.R4, 1, int64(i&math.MaxUint8), asm.Byte),
// size
asm.Mov.Imm(asm.R5, int32(sampleSize)),
asm.FnPerfEventOutput.Call(),
)
}
insns = append(insns, asm.Return())
prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
License: "GPL",
Type: ebpf.XDP,
Instructions: insns,
})
if err != nil {
tb.Fatal(err)
}
tb.Cleanup(func() { prog.Close() })
return prog
}
func checkRecord(tb testing.TB, rd *Reader) (id int) {
tb.Helper()
rec, err := rd.Read()
qt.Assert(tb, err, qt.IsNil)
qt.Assert(tb, rec.CPU >= 0, qt.IsTrue, qt.Commentf("Record has invalid CPU number"))
size := int(rec.RawSample[0])
qt.Assert(tb, len(rec.RawSample) >= size, qt.IsTrue, qt.Commentf("RawSample is at least size bytes"))
for i, v := range rec.RawSample[2:size] {
qt.Assert(tb, v, qt.Equals, byte(0xff), qt.Commentf("filler at position %d should match", i+2))
}
// padding is ignored since it's value is undefined.
return int(rec.RawSample[1])
}
func TestPerfReaderLostSample(t *testing.T) {
// To generate a lost sample perf record:
//
// 1. Fill the perf ring buffer almost completely, with the output_large program.
// The buffer is sized in number of pages, which are architecture dependant.
//
// 2. Write an extra event that doesn't fit in the space remaining.
//
// 3. Write a smaller event that does fit, with output_single program.
// Lost sample records are generated opportunistically, when the kernel
// is writing an event and realizes that there were events lost previously.
//
// The event size is hardcoded in the test BPF programs, there's no way
// to parametrize it without rebuilding the programs.
//
// The event size needs to be selected so that, for any page size, there are at least
// 48 bytes left in the perf ring page after filling it with a whole number of events:
//
// - PERF_RECORD_LOST: 8 (perf_event_header) + 16 (PERF_RECORD_LOST)
//
// - output_single: 8 (perf_event_header) + 4 (size) + 5 (payload) + 7 (padding to 64bits)
//
// By selecting an event size of the form 2^n + 2^(n+1), for any page size 2^(n+m), m >= 0,
// the number of bytes left, x, after filling a page with a whole number of events is:
//
// 2^(n+m) 2^n * 2^m
// x = 2^n * frac(---------------) <=> x = 2^n * frac(---------------)
// 2^n + 2^(n+1) 2^n + 2^n * 2
//
// 2^n * 2^m
// <=> x = 2^n * frac(---------------)
// 2^n * (1 + 2)
//
// 2^m
// <=> x = 2^n * frac(-----)
// 3
//
// 1 2
// <=> x = 2^n * - or x = 2^n * -
// 3 3
//
// Selecting n = 6, we have:
//
// x = 64 or x = 128, no matter the page size 2^(6+m)
//
// event size = 2^6 + 2^7 = 192
//
// Accounting for perf headers, output_large uses a 180 byte payload:
//
// 8 (perf_event_header) + 4 (size) + 180 (payload)
const (
eventSize = 192
)
var (
pageSize = os.Getpagesize()
maxEvents = (pageSize / eventSize)
)
if remainder := pageSize % eventSize; remainder != 64 && remainder != 128 {
// Page size isn't 2^(6+m), m >= 0
t.Fatal("unsupported page size:", pageSize)
}
var sampleSizes []byte
// Fill the ring with the maximum number of output_large events that will fit,
// and generate a lost event by writing an additional event.
for i := 0; i < maxEvents+1; i++ {
sampleSizes = append(sampleSizes, 180)
}
// Generate a small event to trigger the lost record
sampleSizes = append(sampleSizes, 5)
events := perfEventArray(t)
rd, err := NewReader(events, pageSize)
if err != nil {
t.Fatal(err)
}
defer rd.Close()
outputSamples(t, events, sampleSizes...)
for range sampleSizes {
record, err := rd.Read()
if err != nil {
t.Fatal(err)
}
if record.RawSample == nil && record.LostSamples != 1 {
t.Fatal("Expected a record with LostSamples 1, got", record.LostSamples)
}
}
}
func TestPerfReaderOverwritable(t *testing.T) {
// Smallest buffer size.
pageSize := os.Getpagesize()
const sampleSize = math.MaxUint8
// Account for perf header (8) and size (4), align to 8 bytes as perf does.
realSampleSize := internal.Align(sampleSize+8+4, 8)
maxEvents := pageSize / realSampleSize
var sampleSizes []byte
for i := 0; i < maxEvents; i++ {
sampleSizes = append(sampleSizes, sampleSize)
}
// Append an extra sample that will overwrite the first sample.
sampleSizes = append(sampleSizes, sampleSize)
events := perfEventArray(t)
rd, err := NewReaderWithOptions(events, pageSize, ReaderOptions{Overwritable: true})
if err != nil {
t.Fatal(err)
}
defer rd.Close()
_, err = rd.Read()
qt.Assert(t, err, qt.ErrorIs, errMustBePaused)
outputSamples(t, events, sampleSizes...)
qt.Assert(t, rd.Pause(), qt.IsNil)
rd.SetDeadline(time.Now())
nextID := maxEvents
for i := 0; i < maxEvents; i++ {
id := checkRecord(t, rd)
qt.Assert(t, id, qt.Equals, nextID)
nextID--
}
}
func TestPerfReaderOverwritableEmpty(t *testing.T) {
events := perfEventArray(t)
rd, err := NewReaderWithOptions(events, os.Getpagesize(), ReaderOptions{Overwritable: true})
if err != nil {
t.Fatal(err)
}
defer rd.Close()
err = rd.Pause()
if err != nil {
t.Fatal(err)
}
rd.SetDeadline(time.Now().Add(4 * time.Millisecond))
_, err = rd.Read()
qt.Assert(t, errors.Is(err, os.ErrDeadlineExceeded), qt.IsTrue, qt.Commentf("expected os.ErrDeadlineExceeded"))
err = rd.Resume()
if err != nil {
t.Fatal(err)
}
}
func TestPerfReaderClose(t *testing.T) {
events := perfEventArray(t)
rd, err := NewReader(events, 4096)
if err != nil {
t.Fatal(err)
}
defer rd.Close()
errs := make(chan error, 1)
waiting := make(chan struct{})
go func() {
close(waiting)
_, err := rd.Read()
errs <- err
}()
<-waiting
// Close should interrupt Read
if err := rd.Close(); err != nil {
t.Fatal(err)
}
select {
case <-errs:
case <-time.After(time.Second):
t.Fatal("Close doesn't interrupt Read")
}
// And we should be able to call it multiple times
if err := rd.Close(); err != nil {
t.Fatal(err)
}
if _, err := rd.Read(); err == nil {
t.Fatal("Read on a closed PerfReader doesn't return an error")
}
}
func TestCreatePerfEvent(t *testing.T) {
fd, err := createPerfEvent(0, 1, false)
if err != nil {
t.Fatal("Can't create perf event:", err)
}
unix.Close(fd)
}
func TestReadRecord(t *testing.T) {
var buf bytes.Buffer
err := binary.Write(&buf, internal.NativeEndian, &perfEventHeader{})
if err != nil {
t.Fatal(err)
}
var rec Record
err = readRecord(&buf, &rec, make([]byte, perfEventHeaderSize), false)
if !IsUnknownEvent(err) {
t.Error("readRecord should return unknown event error, got", err)
}
}
func TestPause(t *testing.T) {
t.Parallel()
events := perfEventArray(t)
rd, err := NewReader(events, 4096)
if err != nil {
t.Fatal(err)
}
defer rd.Close()
// Reader is already unpaused by default. It should be idempotent.
if err = rd.Resume(); err != nil {
t.Fatal(err)
}
// Write a sample. The reader should read it.
prog := outputSamplesProg(t, events, 5)
ret, _, err := prog.Test(internal.EmptyBPFContext)
testutils.SkipIfNotSupported(t, err)
if err != nil || ret != 0 {
t.Fatal("Can't write sample")
}
if _, err := rd.Read(); err != nil {
t.Fatal(err)
}
// Pause. No notification should trigger.
if err = rd.Pause(); err != nil {
t.Fatal(err)
}
errChan := make(chan error, 1)
go func() {
// Read one notification then send any errors and exit.
_, err := rd.Read()
errChan <- err
}()
ret, _, err = prog.Test(internal.EmptyBPFContext)
if err == nil && ret == 0 {
t.Fatal("Unexpectedly wrote sample while paused")
} // else Success
select {
case err := <-errChan:
// Failure: Pause was unsuccessful.
t.Fatalf("received notification on paused reader: %s", err)
case <-time.After(readTimeout):
// Success
}
// Pause should be idempotent.
if err = rd.Pause(); err != nil {
t.Fatal(err)
}
// Resume. Now notifications should continue.
if err = rd.Resume(); err != nil {
t.Fatal(err)
}
ret, _, err = prog.Test(internal.EmptyBPFContext)
if err != nil || ret != 0 {
t.Fatal("Can't write sample")
}
select {
case err := <-errChan:
if err != nil {
t.Fatal(err)
} // else Success
case <-time.After(readTimeout):
t.Fatal("timed out waiting for notification after resume")
}
if err = rd.Close(); err != nil {
t.Fatal(err)
}
// Pause/Resume after close should be no-op.
err = rd.Pause()
qt.Assert(t, err, qt.Not(qt.Equals), ErrClosed, qt.Commentf("returns unwrapped ErrClosed"))
qt.Assert(t, errors.Is(err, ErrClosed), qt.IsTrue, qt.Commentf("doesn't wrap ErrClosed"))
err = rd.Resume()
qt.Assert(t, err, qt.Not(qt.Equals), ErrClosed, qt.Commentf("returns unwrapped ErrClosed"))
qt.Assert(t, errors.Is(err, ErrClosed), qt.IsTrue, qt.Commentf("doesn't wrap ErrClosed"))
}
func BenchmarkReader(b *testing.B) {
events := perfEventArray(b)
prog := outputSamplesProg(b, events, 80)
rd, err := NewReader(events, 4096)
if err != nil {
b.Fatal(err)
}
defer rd.Close()
buf := internal.EmptyBPFContext
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
ret, _, err := prog.Test(buf)
if err != nil {
b.Fatal(err)
} else if errno := syscall.Errno(-int32(ret)); errno != 0 {
b.Fatal("Expected 0 as return value, got", errno)
}
if _, err = rd.Read(); err != nil {
b.Fatal(err)
}
}
}
func BenchmarkReadInto(b *testing.B) {
events := perfEventArray(b)
prog := outputSamplesProg(b, events, 80)
rd, err := NewReader(events, 4096)
if err != nil {
b.Fatal(err)
}
defer rd.Close()
buf := internal.EmptyBPFContext
b.ResetTimer()
b.ReportAllocs()
var rec Record
for i := 0; i < b.N; i++ {
// NB: Submitting samples into the perf event ring dominates
// the benchmark time unfortunately.
ret, _, err := prog.Test(buf)
if err != nil {
b.Fatal(err)
} else if errno := syscall.Errno(-int32(ret)); errno != 0 {
b.Fatal("Expected 0 as return value, got", errno)
}
if err := rd.ReadInto(&rec); err != nil {
b.Fatal(err)
}
}
}
// This exists just to make the example below nicer.
func bpfPerfEventOutputProgram() (*ebpf.Program, *ebpf.Map) {
return nil, nil
}
// ExamplePerfReader submits a perf event using BPF,
// and then reads it in user space.
//
// The BPF will look something like this:
//
// struct map events __section("maps") = {
// .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
// };
//
// __section("xdp") int output_single(void *ctx) {
// unsigned char buf[] = {
// 1, 2, 3, 4, 5
// };
//
// return perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &buf[0], 5);
// }
//
// Also see BPF_F_CTXLEN_MASK if you want to sample packet data
// from SKB or XDP programs.
func ExampleReader() {
prog, events := bpfPerfEventOutputProgram()
defer prog.Close()
defer events.Close()
rd, err := NewReader(events, 4096)
if err != nil {
panic(err)
}
defer rd.Close()
// Writes out a sample with content 1,2,3,4,4
ret, _, err := prog.Test(internal.EmptyBPFContext)
if err != nil || ret != 0 {
panic("Can't write sample")
}
record, err := rd.Read()
if err != nil {
panic(err)
}
// Data is padded with 0 for alignment
fmt.Println("Sample:", record.RawSample)
}
// ReadRecord allows reducing memory allocations.
func ExampleReader_ReadInto() {
prog, events := bpfPerfEventOutputProgram()
defer prog.Close()
defer events.Close()
rd, err := NewReader(events, 4096)
if err != nil {
panic(err)
}
defer rd.Close()
for i := 0; i < 2; i++ {
// Write out two samples
ret, _, err := prog.Test(internal.EmptyBPFContext)
if err != nil || ret != 0 {
panic("Can't write sample")
}
}
var rec Record
for i := 0; i < 2; i++ {
if err := rd.ReadInto(&rec); err != nil {
panic(err)
}
fmt.Println("Sample:", rec.RawSample[:5])
}
}
func perfEventArray(tb testing.TB) *ebpf.Map {
events, err := ebpf.NewMap(&ebpf.MapSpec{
Type: ebpf.PerfEventArray,
})
if err != nil {
tb.Fatal(err)
}
tb.Cleanup(func() { events.Close() })
return events
}