whatcanGOwrong

This commit is contained in:
2024-09-19 21:38:24 -04:00
commit d0ae4d841d
17908 changed files with 4096831 additions and 0 deletions
@@ -0,0 +1,6 @@
// Package sys contains bindings for the BPF syscall.
package sys
// Regenerate types.go by invoking go generate in the current directory.
//go:generate go run github.com/cilium/ebpf/internal/cmd/gentypes ../../btf/testdata/vmlinux.btf.gz
@@ -0,0 +1,133 @@
package sys
import (
"fmt"
"math"
"os"
"runtime"
"strconv"
"github.com/cilium/ebpf/internal/unix"
)
var ErrClosedFd = unix.EBADF
type FD struct {
raw int
}
func newFD(value int) *FD {
if onLeakFD != nil {
// Attempt to store the caller's stack for the given fd value.
// Panic if fds contains an existing stack for the fd.
old, exist := fds.LoadOrStore(value, callersFrames())
if exist {
f := old.(*runtime.Frames)
panic(fmt.Sprintf("found existing stack for fd %d:\n%s", value, FormatFrames(f)))
}
}
fd := &FD{value}
runtime.SetFinalizer(fd, (*FD).finalize)
return fd
}
// finalize is set as the FD's runtime finalizer and
// sends a leak trace before calling FD.Close().
func (fd *FD) finalize() {
if fd.raw < 0 {
return
}
// Invoke the fd leak callback. Calls LoadAndDelete to guarantee the callback
// is invoked at most once for one sys.FD allocation, runtime.Frames can only
// be unwound once.
f, ok := fds.LoadAndDelete(fd.Int())
if ok && onLeakFD != nil {
onLeakFD(f.(*runtime.Frames))
}
_ = fd.Close()
}
// NewFD wraps a raw fd with a finalizer.
//
// You must not use the raw fd after calling this function, since the underlying
// file descriptor number may change. This is because the BPF UAPI assumes that
// zero is not a valid fd value.
func NewFD(value int) (*FD, error) {
if value < 0 {
return nil, fmt.Errorf("invalid fd %d", value)
}
fd := newFD(value)
if value != 0 {
return fd, nil
}
dup, err := fd.Dup()
_ = fd.Close()
return dup, err
}
func (fd *FD) String() string {
return strconv.FormatInt(int64(fd.raw), 10)
}
func (fd *FD) Int() int {
return fd.raw
}
func (fd *FD) Uint() uint32 {
if fd.raw < 0 || int64(fd.raw) > math.MaxUint32 {
// Best effort: this is the number most likely to be an invalid file
// descriptor. It is equal to -1 (on two's complement arches).
return math.MaxUint32
}
return uint32(fd.raw)
}
func (fd *FD) Close() error {
if fd.raw < 0 {
return nil
}
return unix.Close(fd.disown())
}
func (fd *FD) disown() int {
value := int(fd.raw)
fds.Delete(int(value))
fd.raw = -1
runtime.SetFinalizer(fd, nil)
return value
}
func (fd *FD) Dup() (*FD, error) {
if fd.raw < 0 {
return nil, ErrClosedFd
}
// Always require the fd to be larger than zero: the BPF API treats the value
// as "no argument provided".
dup, err := unix.FcntlInt(uintptr(fd.raw), unix.F_DUPFD_CLOEXEC, 1)
if err != nil {
return nil, fmt.Errorf("can't dup fd: %v", err)
}
return newFD(dup), nil
}
// File takes ownership of FD and turns it into an [*os.File].
//
// You must not use the FD after the call returns.
//
// Returns nil if the FD is not valid.
func (fd *FD) File(name string) *os.File {
if fd.raw < 0 {
return nil
}
return os.NewFile(uintptr(fd.disown()), name)
}
@@ -0,0 +1,66 @@
package sys
import (
"os"
"syscall"
"testing"
"github.com/cilium/ebpf/internal/unix"
qt "github.com/frankban/quicktest"
)
func init() {
// Free up fd 0 for TestFD.
stdin, err := unix.FcntlInt(os.Stdin.Fd(), unix.F_DUPFD_CLOEXEC, 1)
if err != nil {
panic(err)
}
old := os.Stdin
os.Stdin = os.NewFile(uintptr(stdin), "stdin")
old.Close()
reserveFdZero()
}
func reserveFdZero() {
fd, err := unix.Open(os.DevNull, syscall.O_RDONLY, 0)
if err != nil {
panic(err)
}
if fd != 0 {
panic(err)
}
}
func TestFD(t *testing.T) {
_, err := NewFD(-1)
qt.Assert(t, err, qt.IsNotNil, qt.Commentf("negative fd should be rejected"))
fd, err := NewFD(0)
qt.Assert(t, err, qt.IsNil)
qt.Assert(t, fd.Int(), qt.Not(qt.Equals), 0, qt.Commentf("fd value should not be zero"))
var stat unix.Stat_t
err = unix.Fstat(0, &stat)
qt.Assert(t, err, qt.ErrorIs, unix.EBADF, qt.Commentf("zero fd should be closed"))
reserveFdZero()
}
func TestFDFile(t *testing.T) {
fd := newFD(openFd(t))
file := fd.File("test")
qt.Assert(t, file, qt.IsNotNil)
qt.Assert(t, file.Close(), qt.IsNil)
qt.Assert(t, fd.File("closed"), qt.IsNil)
_, err := fd.Dup()
qt.Assert(t, err, qt.ErrorIs, ErrClosedFd)
}
func openFd(tb testing.TB) int {
fd, err := unix.Open(os.DevNull, syscall.O_RDONLY, 0)
qt.Assert(tb, err, qt.IsNil)
return fd
}
@@ -0,0 +1,93 @@
package sys
import (
"bytes"
"fmt"
"runtime"
"sync"
)
// OnLeakFD controls tracing [FD] lifetime to detect resources that are not
// closed by Close().
//
// If fn is not nil, tracing is enabled for all FDs created going forward. fn is
// invoked for all FDs that are closed by the garbage collector instead of an
// explicit Close() by a caller. Calling OnLeakFD twice with a non-nil fn
// (without disabling tracing in the meantime) will cause a panic.
//
// If fn is nil, tracing will be disabled. Any FDs that have not been closed are
// considered to be leaked, fn will be invoked for them, and the process will be
// terminated.
//
// fn will be invoked at most once for every unique sys.FD allocation since a
// runtime.Frames can only be unwound once.
func OnLeakFD(fn func(*runtime.Frames)) {
// Enable leak tracing if new fn is provided.
if fn != nil {
if onLeakFD != nil {
panic("OnLeakFD called twice with non-nil fn")
}
onLeakFD = fn
return
}
// fn is nil past this point.
if onLeakFD == nil {
return
}
// Call onLeakFD for all open fds.
if fs := flushFrames(); len(fs) != 0 {
for _, f := range fs {
onLeakFD(f)
}
}
onLeakFD = nil
}
var onLeakFD func(*runtime.Frames)
// fds is a registry of all file descriptors wrapped into sys.fds that were
// created while an fd tracer was active.
var fds sync.Map // map[int]*runtime.Frames
// flushFrames removes all elements from fds and returns them as a slice. This
// deals with the fact that a runtime.Frames can only be unwound once using
// Next().
func flushFrames() []*runtime.Frames {
var frames []*runtime.Frames
fds.Range(func(key, value any) bool {
frames = append(frames, value.(*runtime.Frames))
fds.Delete(key)
return true
})
return frames
}
func callersFrames() *runtime.Frames {
c := make([]uintptr, 32)
// Skip runtime.Callers and this function.
i := runtime.Callers(2, c)
if i == 0 {
return nil
}
return runtime.CallersFrames(c)
}
// FormatFrames formats a runtime.Frames as a human-readable string.
func FormatFrames(fs *runtime.Frames) string {
var b bytes.Buffer
for {
f, more := fs.Next()
b.WriteString(fmt.Sprintf("\t%s+%#x\n\t\t%s:%d\n", f.Function, f.PC-f.Entry, f.File, f.Line))
if !more {
break
}
}
return b.String()
}
@@ -0,0 +1,49 @@
// Code generated by "stringer -type MapFlags"; DO NOT EDIT.
package sys
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[BPF_F_NO_PREALLOC-1]
_ = x[BPF_F_NO_COMMON_LRU-2]
_ = x[BPF_F_NUMA_NODE-4]
_ = x[BPF_F_RDONLY-8]
_ = x[BPF_F_WRONLY-16]
_ = x[BPF_F_STACK_BUILD_ID-32]
_ = x[BPF_F_ZERO_SEED-64]
_ = x[BPF_F_RDONLY_PROG-128]
_ = x[BPF_F_WRONLY_PROG-256]
_ = x[BPF_F_CLONE-512]
_ = x[BPF_F_MMAPABLE-1024]
_ = x[BPF_F_PRESERVE_ELEMS-2048]
_ = x[BPF_F_INNER_MAP-4096]
}
const _MapFlags_name = "BPF_F_NO_PREALLOCBPF_F_NO_COMMON_LRUBPF_F_NUMA_NODEBPF_F_RDONLYBPF_F_WRONLYBPF_F_STACK_BUILD_IDBPF_F_ZERO_SEEDBPF_F_RDONLY_PROGBPF_F_WRONLY_PROGBPF_F_CLONEBPF_F_MMAPABLEBPF_F_PRESERVE_ELEMSBPF_F_INNER_MAP"
var _MapFlags_map = map[MapFlags]string{
1: _MapFlags_name[0:17],
2: _MapFlags_name[17:36],
4: _MapFlags_name[36:51],
8: _MapFlags_name[51:63],
16: _MapFlags_name[63:75],
32: _MapFlags_name[75:95],
64: _MapFlags_name[95:110],
128: _MapFlags_name[110:127],
256: _MapFlags_name[127:144],
512: _MapFlags_name[144:155],
1024: _MapFlags_name[155:169],
2048: _MapFlags_name[169:189],
4096: _MapFlags_name[189:204],
}
func (i MapFlags) String() string {
if str, ok := _MapFlags_map[i]; ok {
return str
}
return "MapFlags(" + strconv.FormatInt(int64(i), 10) + ")"
}
@@ -0,0 +1,52 @@
package sys
import (
"unsafe"
"github.com/cilium/ebpf/internal/unix"
)
// NewPointer creates a 64-bit pointer from an unsafe Pointer.
func NewPointer(ptr unsafe.Pointer) Pointer {
return Pointer{ptr: ptr}
}
// NewSlicePointer creates a 64-bit pointer from a byte slice.
func NewSlicePointer(buf []byte) Pointer {
if len(buf) == 0 {
return Pointer{}
}
return Pointer{ptr: unsafe.Pointer(&buf[0])}
}
// NewSlicePointerLen creates a 64-bit pointer from a byte slice.
//
// Useful to assign both the pointer and the length in one go.
func NewSlicePointerLen(buf []byte) (Pointer, uint32) {
return NewSlicePointer(buf), uint32(len(buf))
}
// NewStringPointer creates a 64-bit pointer from a string.
func NewStringPointer(str string) Pointer {
p, err := unix.BytePtrFromString(str)
if err != nil {
return Pointer{}
}
return Pointer{ptr: unsafe.Pointer(p)}
}
// NewStringSlicePointer allocates an array of Pointers to each string in the
// given slice of strings and returns a 64-bit pointer to the start of the
// resulting array.
//
// Use this function to pass arrays of strings as syscall arguments.
func NewStringSlicePointer(strings []string) Pointer {
sp := make([]Pointer, 0, len(strings))
for _, s := range strings {
sp = append(sp, NewStringPointer(s))
}
return Pointer{ptr: unsafe.Pointer(&sp[0])}
}
@@ -0,0 +1,14 @@
//go:build armbe || mips || mips64p32
package sys
import (
"unsafe"
)
// Pointer wraps an unsafe.Pointer to be 64bit to
// conform to the syscall specification.
type Pointer struct {
pad uint32
ptr unsafe.Pointer
}
@@ -0,0 +1,14 @@
//go:build 386 || amd64p32 || arm || mipsle || mips64p32le
package sys
import (
"unsafe"
)
// Pointer wraps an unsafe.Pointer to be 64bit to
// conform to the syscall specification.
type Pointer struct {
ptr unsafe.Pointer
pad uint32
}
@@ -0,0 +1,13 @@
//go:build !386 && !amd64p32 && !arm && !mipsle && !mips64p32le && !armbe && !mips && !mips64p32
package sys
import (
"unsafe"
)
// Pointer wraps an unsafe.Pointer to be 64bit to
// conform to the syscall specification.
type Pointer struct {
ptr unsafe.Pointer
}
@@ -0,0 +1,83 @@
package sys
import (
"fmt"
"runtime"
"unsafe"
"github.com/cilium/ebpf/internal/unix"
)
// A sigset containing only SIGPROF.
var profSet unix.Sigset_t
func init() {
// See sigsetAdd for details on the implementation. Open coded here so
// that the compiler will check the constant calculations for us.
profSet.Val[sigprofBit/wordBits] |= 1 << (sigprofBit % wordBits)
}
// maskProfilerSignal locks the calling goroutine to its underlying OS thread
// and adds SIGPROF to the thread's signal mask. This prevents pprof from
// interrupting expensive syscalls like e.g. BPF_PROG_LOAD.
//
// The caller must defer unmaskProfilerSignal() to reverse the operation.
func maskProfilerSignal() {
runtime.LockOSThread()
if err := unix.PthreadSigmask(unix.SIG_BLOCK, &profSet, nil); err != nil {
runtime.UnlockOSThread()
panic(fmt.Errorf("masking profiler signal: %w", err))
}
}
// unmaskProfilerSignal removes SIGPROF from the underlying thread's signal
// mask, allowing it to be interrupted for profiling once again.
//
// It also unlocks the current goroutine from its underlying OS thread.
func unmaskProfilerSignal() {
defer runtime.UnlockOSThread()
if err := unix.PthreadSigmask(unix.SIG_UNBLOCK, &profSet, nil); err != nil {
panic(fmt.Errorf("unmasking profiler signal: %w", err))
}
}
const (
// Signal is the nth bit in the bitfield.
sigprofBit = int(unix.SIGPROF - 1)
// The number of bits in one Sigset_t word.
wordBits = int(unsafe.Sizeof(unix.Sigset_t{}.Val[0])) * 8
)
// sigsetAdd adds signal to set.
//
// Note: Sigset_t.Val's value type is uint32 or uint64 depending on the arch.
// This function must be able to deal with both and so must avoid any direct
// references to u32 or u64 types.
func sigsetAdd(set *unix.Sigset_t, signal unix.Signal) error {
if signal < 1 {
return fmt.Errorf("signal %d must be larger than 0", signal)
}
// For amd64, runtime.sigaddset() performs the following operation:
// set[(signal-1)/32] |= 1 << ((uint32(signal) - 1) & 31)
//
// This trick depends on sigset being two u32's, causing a signal in the the
// bottom 31 bits to be written to the low word if bit 32 is low, or the high
// word if bit 32 is high.
// Signal is the nth bit in the bitfield.
bit := int(signal - 1)
// Word within the sigset the bit needs to be written to.
word := bit / wordBits
if word >= len(set.Val) {
return fmt.Errorf("signal %d does not fit within unix.Sigset_t", signal)
}
// Write the signal bit into its corresponding word at the corrected offset.
set.Val[word] |= 1 << (bit % wordBits)
return nil
}
@@ -0,0 +1,78 @@
package sys
import (
"runtime"
"testing"
"unsafe"
"github.com/cilium/ebpf/internal/unix"
qt "github.com/frankban/quicktest"
)
func TestSigset(t *testing.T) {
const maxSignal = unix.Signal(unsafe.Sizeof(unix.Sigset_t{}) * 8)
// Type-infer a sigset word. This is a typed uint of 32 or 64 bits depending
// on the target architecture, so we can't use an untyped uint.
zero := unix.Sigset_t{}.Val[0]
words := len(unix.Sigset_t{}.Val)
var want, got unix.Sigset_t
// Flip the first bit of the first word.
if err := sigsetAdd(&got, 1); err != nil {
t.Fatal(err)
}
want.Val[0] = 1
if want != got {
t.Fatalf("expected first word to be 0x%x, got: 0x%x", want, got)
}
// And the last bit of the last word.
if err := sigsetAdd(&got, maxSignal); err != nil {
t.Fatal(err)
}
want.Val[words-1] = ^(^zero >> 1)
if want != got {
t.Fatalf("expected last word to be 0x%x, got: 0x%x", want, got)
}
if err := sigsetAdd(&got, maxSignal+1); err == nil {
t.Fatal("expected out-of-bounds add to be rejected")
}
if err := sigsetAdd(&got, -1); err == nil {
t.Fatal("expected negative signal to be rejected")
}
}
func TestProfilerSignal(t *testing.T) {
// Additional goroutine lock to make the PthreadSigmask below execute on the
// same OS thread as the functions under test. UnlockOSThread needs to be
// called as many times as LockOSThread to unlock the goroutine.
runtime.LockOSThread()
defer runtime.UnlockOSThread()
var old unix.Sigset_t
if err := unix.PthreadSigmask(0, nil, &old); err != nil {
t.Fatal("get sigmask:", err)
}
maskProfilerSignal()
var have unix.Sigset_t
if err := unix.PthreadSigmask(0, nil, &have); err != nil {
t.Fatal("get sigmask:", err)
}
want := have
qt.Assert(t, sigsetAdd(&want, unix.SIGPROF), qt.IsNil)
qt.Assert(t, have, qt.Equals, want)
unmaskProfilerSignal()
if err := unix.PthreadSigmask(0, nil, &have); err != nil {
t.Fatal("get sigmask:", err)
}
qt.Assert(t, have, qt.Equals, old)
}
@@ -0,0 +1,178 @@
package sys
import (
"runtime"
"syscall"
"unsafe"
"github.com/cilium/ebpf/internal/unix"
)
// ENOTSUPP is a Linux internal error code that has leaked into UAPI.
//
// It is not the same as ENOTSUP or EOPNOTSUPP.
var ENOTSUPP = syscall.Errno(524)
// BPF wraps SYS_BPF.
//
// Any pointers contained in attr must use the Pointer type from this package.
func BPF(cmd Cmd, attr unsafe.Pointer, size uintptr) (uintptr, error) {
// Prevent the Go profiler from repeatedly interrupting the verifier,
// which could otherwise lead to a livelock due to receiving EAGAIN.
if cmd == BPF_PROG_LOAD || cmd == BPF_PROG_RUN {
maskProfilerSignal()
defer unmaskProfilerSignal()
}
for {
r1, _, errNo := unix.Syscall(unix.SYS_BPF, uintptr(cmd), uintptr(attr), size)
runtime.KeepAlive(attr)
// As of ~4.20 the verifier can be interrupted by a signal,
// and returns EAGAIN in that case.
if errNo == unix.EAGAIN && cmd == BPF_PROG_LOAD {
continue
}
var err error
if errNo != 0 {
err = wrappedErrno{errNo}
}
return r1, err
}
}
// Info is implemented by all structs that can be passed to the ObjInfo syscall.
//
// MapInfo
// ProgInfo
// LinkInfo
// BtfInfo
type Info interface {
info() (unsafe.Pointer, uint32)
}
var _ Info = (*MapInfo)(nil)
func (i *MapInfo) info() (unsafe.Pointer, uint32) {
return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i))
}
var _ Info = (*ProgInfo)(nil)
func (i *ProgInfo) info() (unsafe.Pointer, uint32) {
return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i))
}
var _ Info = (*LinkInfo)(nil)
func (i *LinkInfo) info() (unsafe.Pointer, uint32) {
return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i))
}
var _ Info = (*BtfInfo)(nil)
func (i *BtfInfo) info() (unsafe.Pointer, uint32) {
return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i))
}
// ObjInfo retrieves information about a BPF Fd.
//
// info may be one of MapInfo, ProgInfo, LinkInfo and BtfInfo.
func ObjInfo(fd *FD, info Info) error {
ptr, len := info.info()
err := ObjGetInfoByFd(&ObjGetInfoByFdAttr{
BpfFd: fd.Uint(),
InfoLen: len,
Info: NewPointer(ptr),
})
runtime.KeepAlive(fd)
return err
}
// BPFObjName is a null-terminated string made up of
// 'A-Za-z0-9_' characters.
type ObjName [unix.BPF_OBJ_NAME_LEN]byte
// NewObjName truncates the result if it is too long.
func NewObjName(name string) ObjName {
var result ObjName
copy(result[:unix.BPF_OBJ_NAME_LEN-1], name)
return result
}
// LogLevel controls the verbosity of the kernel's eBPF program verifier.
type LogLevel uint32
const (
BPF_LOG_LEVEL1 LogLevel = 1 << iota
BPF_LOG_LEVEL2
BPF_LOG_STATS
)
// LinkID uniquely identifies a bpf_link.
type LinkID uint32
// BTFID uniquely identifies a BTF blob loaded into the kernel.
type BTFID uint32
// TypeID identifies a type in a BTF blob.
type TypeID uint32
// MapFlags control map behaviour.
type MapFlags uint32
//go:generate stringer -type MapFlags
const (
BPF_F_NO_PREALLOC MapFlags = 1 << iota
BPF_F_NO_COMMON_LRU
BPF_F_NUMA_NODE
BPF_F_RDONLY
BPF_F_WRONLY
BPF_F_STACK_BUILD_ID
BPF_F_ZERO_SEED
BPF_F_RDONLY_PROG
BPF_F_WRONLY_PROG
BPF_F_CLONE
BPF_F_MMAPABLE
BPF_F_PRESERVE_ELEMS
BPF_F_INNER_MAP
)
// wrappedErrno wraps syscall.Errno to prevent direct comparisons with
// syscall.E* or unix.E* constants.
//
// You should never export an error of this type.
type wrappedErrno struct {
syscall.Errno
}
func (we wrappedErrno) Unwrap() error {
return we.Errno
}
func (we wrappedErrno) Error() string {
if we.Errno == ENOTSUPP {
return "operation not supported"
}
return we.Errno.Error()
}
type syscallError struct {
error
errno syscall.Errno
}
func Error(err error, errno syscall.Errno) error {
return &syscallError{err, errno}
}
func (se *syscallError) Is(target error) bool {
return target == se.error
}
func (se *syscallError) Unwrap() error {
return se.errno
}
@@ -0,0 +1,61 @@
package sys
import (
"errors"
"testing"
"github.com/cilium/ebpf/internal/unix"
qt "github.com/frankban/quicktest"
)
func TestObjName(t *testing.T) {
name := NewObjName("more_than_16_characters_long")
if name[len(name)-1] != 0 {
t.Error("NewBPFObjName doesn't null terminate")
}
if len(name) != unix.BPF_OBJ_NAME_LEN {
t.Errorf("Name is %d instead of %d bytes long", len(name), unix.BPF_OBJ_NAME_LEN)
}
}
func TestWrappedErrno(t *testing.T) {
a := error(wrappedErrno{unix.EINVAL})
b := error(unix.EINVAL)
if a == b {
t.Error("wrappedErrno is comparable to plain errno")
}
if !errors.Is(a, b) {
t.Error("errors.Is(wrappedErrno, errno) returns false")
}
if errors.Is(a, unix.EAGAIN) {
t.Error("errors.Is(wrappedErrno, EAGAIN) returns true")
}
notsupp := wrappedErrno{ENOTSUPP}
qt.Assert(t, notsupp.Error(), qt.Contains, "operation not supported")
}
func TestSyscallError(t *testing.T) {
err := errors.New("foo")
foo := Error(err, unix.EINVAL)
if !errors.Is(foo, unix.EINVAL) {
t.Error("SyscallError is not the wrapped errno")
}
if !errors.Is(foo, err) {
t.Error("SyscallError is not the wrapped error")
}
if errors.Is(unix.EINVAL, foo) {
t.Error("Errno is the SyscallError")
}
if errors.Is(err, foo) {
t.Error("Error is the SyscallError")
}
}
File diff suppressed because it is too large Load Diff