package perf import ( "bytes" "encoding/binary" "errors" "fmt" "math" "os" "syscall" "testing" "time" "github.com/cilium/ebpf" "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal/testutils" "github.com/cilium/ebpf/internal/testutils/fdtrace" "github.com/cilium/ebpf/internal/unix" qt "github.com/frankban/quicktest" ) var ( readTimeout = 250 * time.Millisecond ) func TestMain(m *testing.M) { fdtrace.TestMain(m) } func TestPerfReader(t *testing.T) { events := perfEventArray(t) rd, err := NewReader(events, 4096) if err != nil { t.Fatal(err) } defer rd.Close() outputSamples(t, events, 5) checkRecord(t, rd) rd.SetDeadline(time.Now().Add(4 * time.Millisecond)) _, err = rd.Read() qt.Assert(t, errors.Is(err, os.ErrDeadlineExceeded), qt.IsTrue, qt.Commentf("expected os.ErrDeadlineExceeded")) } func TestReaderSetDeadline(t *testing.T) { events := perfEventArray(t) rd, err := NewReader(events, 4096) if err != nil { t.Fatal(err) } defer rd.Close() rd.SetDeadline(time.Now().Add(-time.Second)) if _, err := rd.Read(); !errors.Is(err, os.ErrDeadlineExceeded) { t.Error("Expected os.ErrDeadlineExceeded from first Read, got:", err) } if _, err := rd.Read(); !errors.Is(err, os.ErrDeadlineExceeded) { t.Error("Expected os.ErrDeadlineExceeded from second Read, got:", err) } } func outputSamples(tb testing.TB, events *ebpf.Map, sampleSizes ...byte) { prog := outputSamplesProg(tb, events, sampleSizes...) ret, _, err := prog.Test(internal.EmptyBPFContext) testutils.SkipIfNotSupported(tb, err) if err != nil { tb.Fatal(err) } if errno := syscall.Errno(-int32(ret)); errno != 0 { tb.Fatal("Expected 0 as return value, got", errno) } } // outputSamplesProg creates a program which submits a series of samples to a PerfEventArray. // // The format of each sample is: // // index: 0 1 2 3 ... size - 1 // content: size id 0xff 0xff ... 0xff [padding] // // padding is an implementation detail of the perf buffer and 1-7 bytes long. The // contents are undefined. func outputSamplesProg(tb testing.TB, events *ebpf.Map, sampleSizes ...byte) *ebpf.Program { tb.Helper() // Requires at least 4.9 (0515e5999a46 "bpf: introduce BPF_PROG_TYPE_PERF_EVENT program type") testutils.SkipOnOldKernel(tb, "4.9", "perf events support") const bpfFCurrentCPU = 0xffffffff var maxSampleSize byte for _, sampleSize := range sampleSizes { if sampleSize < 2 { tb.Fatalf("Sample size %d is too small to contain size and counter", sampleSize) } if sampleSize > maxSampleSize { maxSampleSize = sampleSize } } // Fill a buffer on the stack, and stash context somewhere insns := asm.Instructions{ asm.LoadImm(asm.R0, ^int64(0), asm.DWord), asm.Mov.Reg(asm.R9, asm.R1), } bufDwords := int(maxSampleSize/8) + 1 for i := 0; i < bufDwords; i++ { insns = append(insns, asm.StoreMem(asm.RFP, int16(i+1)*-8, asm.R0, asm.DWord), ) } for i, sampleSize := range sampleSizes { insns = append(insns, // Restore stashed context. asm.Mov.Reg(asm.R1, asm.R9), // map asm.LoadMapPtr(asm.R2, events.FD()), // flags asm.LoadImm(asm.R3, bpfFCurrentCPU, asm.DWord), // buffer asm.Mov.Reg(asm.R4, asm.RFP), asm.Add.Imm(asm.R4, int32(bufDwords*-8)), // buffer[0] = size asm.StoreImm(asm.R4, 0, int64(sampleSize), asm.Byte), // buffer[1] = i asm.StoreImm(asm.R4, 1, int64(i&math.MaxUint8), asm.Byte), // size asm.Mov.Imm(asm.R5, int32(sampleSize)), asm.FnPerfEventOutput.Call(), ) } insns = append(insns, asm.Return()) prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ License: "GPL", Type: ebpf.XDP, Instructions: insns, }) if err != nil { tb.Fatal(err) } tb.Cleanup(func() { prog.Close() }) return prog } func checkRecord(tb testing.TB, rd *Reader) (id int) { tb.Helper() rec, err := rd.Read() qt.Assert(tb, err, qt.IsNil) qt.Assert(tb, rec.CPU >= 0, qt.IsTrue, qt.Commentf("Record has invalid CPU number")) size := int(rec.RawSample[0]) qt.Assert(tb, len(rec.RawSample) >= size, qt.IsTrue, qt.Commentf("RawSample is at least size bytes")) for i, v := range rec.RawSample[2:size] { qt.Assert(tb, v, qt.Equals, byte(0xff), qt.Commentf("filler at position %d should match", i+2)) } // padding is ignored since it's value is undefined. return int(rec.RawSample[1]) } func TestPerfReaderLostSample(t *testing.T) { // To generate a lost sample perf record: // // 1. Fill the perf ring buffer almost completely, with the output_large program. // The buffer is sized in number of pages, which are architecture dependant. // // 2. Write an extra event that doesn't fit in the space remaining. // // 3. Write a smaller event that does fit, with output_single program. // Lost sample records are generated opportunistically, when the kernel // is writing an event and realizes that there were events lost previously. // // The event size is hardcoded in the test BPF programs, there's no way // to parametrize it without rebuilding the programs. // // The event size needs to be selected so that, for any page size, there are at least // 48 bytes left in the perf ring page after filling it with a whole number of events: // // - PERF_RECORD_LOST: 8 (perf_event_header) + 16 (PERF_RECORD_LOST) // // - output_single: 8 (perf_event_header) + 4 (size) + 5 (payload) + 7 (padding to 64bits) // // By selecting an event size of the form 2^n + 2^(n+1), for any page size 2^(n+m), m >= 0, // the number of bytes left, x, after filling a page with a whole number of events is: // // 2^(n+m) 2^n * 2^m // x = 2^n * frac(---------------) <=> x = 2^n * frac(---------------) // 2^n + 2^(n+1) 2^n + 2^n * 2 // // 2^n * 2^m // <=> x = 2^n * frac(---------------) // 2^n * (1 + 2) // // 2^m // <=> x = 2^n * frac(-----) // 3 // // 1 2 // <=> x = 2^n * - or x = 2^n * - // 3 3 // // Selecting n = 6, we have: // // x = 64 or x = 128, no matter the page size 2^(6+m) // // event size = 2^6 + 2^7 = 192 // // Accounting for perf headers, output_large uses a 180 byte payload: // // 8 (perf_event_header) + 4 (size) + 180 (payload) const ( eventSize = 192 ) var ( pageSize = os.Getpagesize() maxEvents = (pageSize / eventSize) ) if remainder := pageSize % eventSize; remainder != 64 && remainder != 128 { // Page size isn't 2^(6+m), m >= 0 t.Fatal("unsupported page size:", pageSize) } var sampleSizes []byte // Fill the ring with the maximum number of output_large events that will fit, // and generate a lost event by writing an additional event. for i := 0; i < maxEvents+1; i++ { sampleSizes = append(sampleSizes, 180) } // Generate a small event to trigger the lost record sampleSizes = append(sampleSizes, 5) events := perfEventArray(t) rd, err := NewReader(events, pageSize) if err != nil { t.Fatal(err) } defer rd.Close() outputSamples(t, events, sampleSizes...) for range sampleSizes { record, err := rd.Read() if err != nil { t.Fatal(err) } if record.RawSample == nil && record.LostSamples != 1 { t.Fatal("Expected a record with LostSamples 1, got", record.LostSamples) } } } func TestPerfReaderOverwritable(t *testing.T) { // Smallest buffer size. pageSize := os.Getpagesize() const sampleSize = math.MaxUint8 // Account for perf header (8) and size (4), align to 8 bytes as perf does. realSampleSize := internal.Align(sampleSize+8+4, 8) maxEvents := pageSize / realSampleSize var sampleSizes []byte for i := 0; i < maxEvents; i++ { sampleSizes = append(sampleSizes, sampleSize) } // Append an extra sample that will overwrite the first sample. sampleSizes = append(sampleSizes, sampleSize) events := perfEventArray(t) rd, err := NewReaderWithOptions(events, pageSize, ReaderOptions{Overwritable: true}) if err != nil { t.Fatal(err) } defer rd.Close() _, err = rd.Read() qt.Assert(t, err, qt.ErrorIs, errMustBePaused) outputSamples(t, events, sampleSizes...) qt.Assert(t, rd.Pause(), qt.IsNil) rd.SetDeadline(time.Now()) nextID := maxEvents for i := 0; i < maxEvents; i++ { id := checkRecord(t, rd) qt.Assert(t, id, qt.Equals, nextID) nextID-- } } func TestPerfReaderOverwritableEmpty(t *testing.T) { events := perfEventArray(t) rd, err := NewReaderWithOptions(events, os.Getpagesize(), ReaderOptions{Overwritable: true}) if err != nil { t.Fatal(err) } defer rd.Close() err = rd.Pause() if err != nil { t.Fatal(err) } rd.SetDeadline(time.Now().Add(4 * time.Millisecond)) _, err = rd.Read() qt.Assert(t, errors.Is(err, os.ErrDeadlineExceeded), qt.IsTrue, qt.Commentf("expected os.ErrDeadlineExceeded")) err = rd.Resume() if err != nil { t.Fatal(err) } } func TestPerfReaderClose(t *testing.T) { events := perfEventArray(t) rd, err := NewReader(events, 4096) if err != nil { t.Fatal(err) } defer rd.Close() errs := make(chan error, 1) waiting := make(chan struct{}) go func() { close(waiting) _, err := rd.Read() errs <- err }() <-waiting // Close should interrupt Read if err := rd.Close(); err != nil { t.Fatal(err) } select { case <-errs: case <-time.After(time.Second): t.Fatal("Close doesn't interrupt Read") } // And we should be able to call it multiple times if err := rd.Close(); err != nil { t.Fatal(err) } if _, err := rd.Read(); err == nil { t.Fatal("Read on a closed PerfReader doesn't return an error") } } func TestCreatePerfEvent(t *testing.T) { fd, err := createPerfEvent(0, 1, false) if err != nil { t.Fatal("Can't create perf event:", err) } unix.Close(fd) } func TestReadRecord(t *testing.T) { var buf bytes.Buffer err := binary.Write(&buf, internal.NativeEndian, &perfEventHeader{}) if err != nil { t.Fatal(err) } var rec Record err = readRecord(&buf, &rec, make([]byte, perfEventHeaderSize), false) if !IsUnknownEvent(err) { t.Error("readRecord should return unknown event error, got", err) } } func TestPause(t *testing.T) { t.Parallel() events := perfEventArray(t) rd, err := NewReader(events, 4096) if err != nil { t.Fatal(err) } defer rd.Close() // Reader is already unpaused by default. It should be idempotent. if err = rd.Resume(); err != nil { t.Fatal(err) } // Write a sample. The reader should read it. prog := outputSamplesProg(t, events, 5) ret, _, err := prog.Test(internal.EmptyBPFContext) testutils.SkipIfNotSupported(t, err) if err != nil || ret != 0 { t.Fatal("Can't write sample") } if _, err := rd.Read(); err != nil { t.Fatal(err) } // Pause. No notification should trigger. if err = rd.Pause(); err != nil { t.Fatal(err) } errChan := make(chan error, 1) go func() { // Read one notification then send any errors and exit. _, err := rd.Read() errChan <- err }() ret, _, err = prog.Test(internal.EmptyBPFContext) if err == nil && ret == 0 { t.Fatal("Unexpectedly wrote sample while paused") } // else Success select { case err := <-errChan: // Failure: Pause was unsuccessful. t.Fatalf("received notification on paused reader: %s", err) case <-time.After(readTimeout): // Success } // Pause should be idempotent. if err = rd.Pause(); err != nil { t.Fatal(err) } // Resume. Now notifications should continue. if err = rd.Resume(); err != nil { t.Fatal(err) } ret, _, err = prog.Test(internal.EmptyBPFContext) if err != nil || ret != 0 { t.Fatal("Can't write sample") } select { case err := <-errChan: if err != nil { t.Fatal(err) } // else Success case <-time.After(readTimeout): t.Fatal("timed out waiting for notification after resume") } if err = rd.Close(); err != nil { t.Fatal(err) } // Pause/Resume after close should be no-op. err = rd.Pause() qt.Assert(t, err, qt.Not(qt.Equals), ErrClosed, qt.Commentf("returns unwrapped ErrClosed")) qt.Assert(t, errors.Is(err, ErrClosed), qt.IsTrue, qt.Commentf("doesn't wrap ErrClosed")) err = rd.Resume() qt.Assert(t, err, qt.Not(qt.Equals), ErrClosed, qt.Commentf("returns unwrapped ErrClosed")) qt.Assert(t, errors.Is(err, ErrClosed), qt.IsTrue, qt.Commentf("doesn't wrap ErrClosed")) } func BenchmarkReader(b *testing.B) { events := perfEventArray(b) prog := outputSamplesProg(b, events, 80) rd, err := NewReader(events, 4096) if err != nil { b.Fatal(err) } defer rd.Close() buf := internal.EmptyBPFContext b.ResetTimer() b.ReportAllocs() for i := 0; i < b.N; i++ { ret, _, err := prog.Test(buf) if err != nil { b.Fatal(err) } else if errno := syscall.Errno(-int32(ret)); errno != 0 { b.Fatal("Expected 0 as return value, got", errno) } if _, err = rd.Read(); err != nil { b.Fatal(err) } } } func BenchmarkReadInto(b *testing.B) { events := perfEventArray(b) prog := outputSamplesProg(b, events, 80) rd, err := NewReader(events, 4096) if err != nil { b.Fatal(err) } defer rd.Close() buf := internal.EmptyBPFContext b.ResetTimer() b.ReportAllocs() var rec Record for i := 0; i < b.N; i++ { // NB: Submitting samples into the perf event ring dominates // the benchmark time unfortunately. ret, _, err := prog.Test(buf) if err != nil { b.Fatal(err) } else if errno := syscall.Errno(-int32(ret)); errno != 0 { b.Fatal("Expected 0 as return value, got", errno) } if err := rd.ReadInto(&rec); err != nil { b.Fatal(err) } } } // This exists just to make the example below nicer. func bpfPerfEventOutputProgram() (*ebpf.Program, *ebpf.Map) { return nil, nil } // ExamplePerfReader submits a perf event using BPF, // and then reads it in user space. // // The BPF will look something like this: // // struct map events __section("maps") = { // .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, // }; // // __section("xdp") int output_single(void *ctx) { // unsigned char buf[] = { // 1, 2, 3, 4, 5 // }; // // return perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &buf[0], 5); // } // // Also see BPF_F_CTXLEN_MASK if you want to sample packet data // from SKB or XDP programs. func ExampleReader() { prog, events := bpfPerfEventOutputProgram() defer prog.Close() defer events.Close() rd, err := NewReader(events, 4096) if err != nil { panic(err) } defer rd.Close() // Writes out a sample with content 1,2,3,4,4 ret, _, err := prog.Test(internal.EmptyBPFContext) if err != nil || ret != 0 { panic("Can't write sample") } record, err := rd.Read() if err != nil { panic(err) } // Data is padded with 0 for alignment fmt.Println("Sample:", record.RawSample) } // ReadRecord allows reducing memory allocations. func ExampleReader_ReadInto() { prog, events := bpfPerfEventOutputProgram() defer prog.Close() defer events.Close() rd, err := NewReader(events, 4096) if err != nil { panic(err) } defer rd.Close() for i := 0; i < 2; i++ { // Write out two samples ret, _, err := prog.Test(internal.EmptyBPFContext) if err != nil || ret != 0 { panic("Can't write sample") } } var rec Record for i := 0; i < 2; i++ { if err := rd.ReadInto(&rec); err != nil { panic(err) } fmt.Println("Sample:", rec.RawSample[:5]) } } func perfEventArray(tb testing.TB) *ebpf.Map { events, err := ebpf.NewMap(&ebpf.MapSpec{ Type: ebpf.PerfEventArray, }) if err != nil { tb.Fatal(err) } tb.Cleanup(func() { events.Close() }) return events }