whatcanGOwrong

2024-09-19 21:38:24 -04:00
commit d0ae4d841d
17908 changed files with 4096831 additions and 0 deletions
@@ -0,0 +1,8 @@
+package internal
+
+import "golang.org/x/exp/constraints"
+
+// Align returns 'n' updated to 'alignment' boundary.
+func Align[I constraints.Integer](n, alignment I) I {
+	return (n + alignment - 1) / alignment * alignment
+}
@@ -0,0 +1,31 @@
+package internal
+
+import (
+	"bytes"
+	"sync"
+)
+
+var bytesBufferPool = sync.Pool{
+	New: func() interface{} {
+		return new(bytes.Buffer)
+	},
+}
+
+// NewBuffer retrieves a [bytes.Buffer] from a pool an re-initialises it.
+//
+// The returned buffer should be passed to [PutBuffer].
+func NewBuffer(buf []byte) *bytes.Buffer {
+	wr := bytesBufferPool.Get().(*bytes.Buffer)
+	// Reinitialize the Buffer with a new backing slice since it is returned to
+	// the caller by wr.Bytes() below. Pooling is faster despite calling
+	// NewBuffer. The pooled alloc is still reused, it only needs to be zeroed.
+	*wr = *bytes.NewBuffer(buf)
+	return wr
+}
+
+// PutBuffer releases a buffer to the pool.
+func PutBuffer(buf *bytes.Buffer) {
+	// Release reference to the backing buffer.
+	*buf = *bytes.NewBuffer(nil)
+	bytesBufferPool.Put(buf)
+}
@@ -0,0 +1 @@
+gentypes
@@ -0,0 +1,776 @@
+// Program gentypes reads a compressed vmlinux .BTF section and generates
+// syscall bindings from it.
+//
+// Output is written to "types.go".
+package main
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"os"
+	"sort"
+	"strings"
+
+	"github.com/cilium/ebpf/btf"
+	"github.com/cilium/ebpf/internal"
+	"github.com/cilium/ebpf/internal/sys"
+)
+
+type syscallRetval int
+
+const (
+	retError syscallRetval = iota
+	retFd
+)
+
+func main() {
+	if err := run(os.Args[1:]); err != nil {
+		fmt.Fprintln(os.Stderr, "Error:", err)
+		os.Exit(1)
+	}
+}
+
+func run(args []string) error {
+	if len(args) != 1 {
+		return fmt.Errorf("expect location of compressed vmlinux .BTF as argument")
+	}
+
+	raw, err := internal.ReadAllCompressed(args[0])
+	if err != nil {
+		return err
+	}
+
+	spec, err := btf.LoadSpecFromReader(bytes.NewReader(raw))
+	if err != nil {
+		return err
+	}
+
+	output, err := generateTypes(spec)
+	if err != nil {
+		return err
+	}
+
+	w, err := os.Create("types.go")
+	if err != nil {
+		return err
+	}
+	defer w.Close()
+
+	return internal.WriteFormatted(output, w)
+}
+
+func generateTypes(spec *btf.Spec) ([]byte, error) {
+	objName := &btf.Array{Nelems: 16, Type: &btf.Int{Encoding: btf.Char, Size: 1}}
+	linkID := &btf.Int{Size: 4}
+	btfID := &btf.Int{Size: 4}
+	typeID := &btf.Int{Size: 4}
+	pointer := &btf.Int{Size: 8}
+	logLevel := &btf.Int{Size: 4}
+	mapFlags := &btf.Int{Size: 4}
+
+	gf := &btf.GoFormatter{
+		Names: map[btf.Type]string{
+			objName:  internal.GoTypeName(sys.ObjName{}),
+			linkID:   internal.GoTypeName(sys.LinkID(0)),
+			btfID:    internal.GoTypeName(sys.BTFID(0)),
+			typeID:   internal.GoTypeName(sys.TypeID(0)),
+			pointer:  internal.GoTypeName(sys.Pointer{}),
+			logLevel: internal.GoTypeName(sys.LogLevel(0)),
+			mapFlags: internal.GoTypeName(sys.MapFlags(0)),
+		},
+		Identifier: internal.Identifier,
+		EnumIdentifier: func(name, element string) string {
+			return element
+		},
+	}
+
+	w := bytes.NewBuffer(nil)
+	w.WriteString(`// Code generated by internal/cmd/gentypes; DO NOT EDIT.
+
+package sys
+
+import (
+	"unsafe"
+)
+
+`)
+
+	enums := []struct {
+		goType string
+		cType  string
+	}{
+		{"Cmd", "bpf_cmd"},
+		{"MapType", "bpf_map_type"},
+		{"ProgType", "bpf_prog_type"},
+		{"AttachType", "bpf_attach_type"},
+		{"LinkType", "bpf_link_type"},
+		{"StatsType", "bpf_stats_type"},
+		{"SkAction", "sk_action"},
+		{"StackBuildIdStatus", "bpf_stack_build_id_status"},
+		{"FunctionId", "bpf_func_id"},
+		{"AdjRoomMode", "bpf_adj_room_mode"},
+		{"HdrStartOff", "bpf_hdr_start_off"},
+		{"RetCode", "bpf_ret_code"},
+		{"XdpAction", "xdp_action"},
+	}
+
+	sort.Slice(enums, func(i, j int) bool {
+		return enums[i].goType < enums[j].goType
+	})
+
+	enumTypes := make(map[string]btf.Type)
+	for _, o := range enums {
+		fmt.Println("enum", o.goType)
+
+		var t *btf.Enum
+		if err := spec.TypeByName(o.cType, &t); err != nil {
+			return nil, err
+		}
+
+		// Add the enum as a predeclared type so that generated structs
+		// refer to the Go types.
+		if name := gf.Names[t]; name != "" {
+			return nil, fmt.Errorf("type %q is already declared as %s", o.cType, name)
+		}
+		gf.Names[t] = o.goType
+		enumTypes[o.goType] = t
+
+		decl, err := gf.TypeDeclaration(o.goType, t)
+		if err != nil {
+			return nil, fmt.Errorf("generate %q: %w", o.goType, err)
+		}
+
+		w.WriteString(decl)
+		w.WriteRune('\n')
+	}
+
+	// Assorted structs
+
+	structs := []struct {
+		goType  string
+		cType   string
+		patches []patch
+	}{
+		{
+			"ProgInfo", "bpf_prog_info",
+			[]patch{
+				replace(objName, "name"),
+				replace(pointer, "xlated_prog_insns"),
+				replace(pointer, "map_ids"),
+				replace(btfID, "btf_id"),
+			},
+		},
+		{
+			"MapInfo", "bpf_map_info",
+			[]patch{
+				replace(objName, "name"),
+				replace(mapFlags, "map_flags"),
+				replace(typeID, "btf_vmlinux_value_type_id", "btf_key_type_id", "btf_value_type_id"),
+			},
+		},
+		{
+			"BtfInfo", "bpf_btf_info",
+			[]patch{
+				replace(pointer, "btf", "name"),
+				replace(btfID, "id"),
+			},
+		},
+		{
+			"LinkInfo", "bpf_link_info",
+			[]patch{
+				replace(enumTypes["LinkType"], "type"),
+				replace(linkID, "id"),
+				name(3, "extra"),
+				replaceWithBytes("extra"),
+			},
+		},
+		{"FuncInfo", "bpf_func_info", nil},
+		{"LineInfo", "bpf_line_info", nil},
+		{"XdpMd", "xdp_md", nil},
+		{
+			"SkLookup", "bpf_sk_lookup",
+			[]patch{
+				choose(0, "cookie"),
+				replaceWithBytes("remote_ip4", "remote_ip6", "local_ip4", "local_ip6"),
+			},
+		},
+	}
+
+	sort.Slice(structs, func(i, j int) bool {
+		return structs[i].goType < structs[j].goType
+	})
+
+	for _, s := range structs {
+		fmt.Println("struct", s.goType)
+
+		var t *btf.Struct
+		if err := spec.TypeByName(s.cType, &t); err != nil {
+			return nil, err
+		}
+
+		if err := outputPatchedStruct(gf, w, s.goType, t, s.patches); err != nil {
+			return nil, fmt.Errorf("output %q: %w", s.goType, err)
+		}
+	}
+
+	// Attrs
+
+	attrs := []struct {
+		goType  string
+		ret     syscallRetval
+		cType   string
+		cmd     string
+		patches []patch
+	}{
+		{
+			"MapCreate", retFd, "map_create", "BPF_MAP_CREATE",
+			[]patch{
+				replace(objName, "map_name"),
+				replace(enumTypes["MapType"], "map_type"),
+				replace(mapFlags, "map_flags"),
+				replace(typeID, "btf_vmlinux_value_type_id", "btf_key_type_id", "btf_value_type_id"),
+			},
+		},
+		{
+			"MapLookupElem", retError, "map_elem", "BPF_MAP_LOOKUP_ELEM",
+			[]patch{choose(2, "value"), replace(pointer, "key", "value")},
+		},
+		{
+			"MapLookupAndDeleteElem", retError, "map_elem", "BPF_MAP_LOOKUP_AND_DELETE_ELEM",
+			[]patch{choose(2, "value"), replace(pointer, "key", "value")},
+		},
+		{
+			"MapUpdateElem", retError, "map_elem", "BPF_MAP_UPDATE_ELEM",
+			[]patch{choose(2, "value"), replace(pointer, "key", "value")},
+		},
+		{
+			"MapDeleteElem", retError, "map_elem", "BPF_MAP_DELETE_ELEM",
+			[]patch{choose(2, "value"), replace(pointer, "key", "value")},
+		},
+		{
+			"MapGetNextKey", retError, "map_elem", "BPF_MAP_GET_NEXT_KEY",
+			[]patch{
+				choose(2, "next_key"), replace(pointer, "key", "next_key"),
+				truncateAfter("next_key"),
+			},
+		},
+		{
+			"MapFreeze", retError, "map_elem", "BPF_MAP_FREEZE",
+			[]patch{truncateAfter("map_fd")},
+		},
+		{
+			"MapLookupBatch", retError, "map_elem_batch", "BPF_MAP_LOOKUP_BATCH",
+			[]patch{replace(pointer, "in_batch", "out_batch", "keys", "values")},
+		},
+		{
+			"MapLookupAndDeleteBatch", retError, "map_elem_batch", "BPF_MAP_LOOKUP_AND_DELETE_BATCH",
+			[]patch{replace(pointer, "in_batch", "out_batch", "keys", "values")},
+		},
+		{
+			"MapUpdateBatch", retError, "map_elem_batch", "BPF_MAP_UPDATE_BATCH",
+			[]patch{replace(pointer, "in_batch", "out_batch", "keys", "values")},
+		},
+		{
+			"MapDeleteBatch", retError, "map_elem_batch", "BPF_MAP_DELETE_BATCH",
+			[]patch{replace(pointer, "in_batch", "out_batch", "keys", "values")},
+		},
+		{
+			"ProgLoad", retFd, "prog_load", "BPF_PROG_LOAD",
+			[]patch{
+				replace(objName, "prog_name"),
+				replace(enumTypes["ProgType"], "prog_type"),
+				replace(enumTypes["AttachType"], "expected_attach_type"),
+				replace(logLevel, "log_level"),
+				replace(pointer,
+					"insns",
+					"license",
+					"log_buf",
+					"func_info",
+					"line_info",
+					"fd_array",
+					"core_relos",
+				),
+				replace(typeID, "attach_btf_id"),
+				choose(20, "attach_btf_obj_fd"),
+			},
+		},
+		{
+			"ProgBindMap", retError, "prog_bind_map", "BPF_PROG_BIND_MAP",
+			nil,
+		},
+		{
+			"ObjPin", retError, "obj_pin", "BPF_OBJ_PIN",
+			[]patch{replace(pointer, "pathname")},
+		},
+		{
+			"ObjGet", retFd, "obj_pin", "BPF_OBJ_GET",
+			[]patch{replace(pointer, "pathname")},
+		},
+		{
+			"ProgAttach", retError, "prog_attach", "BPF_PROG_ATTACH",
+			nil,
+		},
+		{
+			"ProgDetach", retError, "prog_attach", "BPF_PROG_DETACH",
+			[]patch{truncateAfter("attach_type")},
+		},
+		{
+			"ProgRun", retError, "prog_run", "BPF_PROG_TEST_RUN",
+			[]patch{replace(pointer, "data_in", "data_out", "ctx_in", "ctx_out")},
+		},
+		{
+			"ProgGetNextId", retError, "obj_next_id", "BPF_PROG_GET_NEXT_ID",
+			[]patch{
+				choose(0, "start_id"), rename("start_id", "id"),
+				truncateAfter("next_id"),
+			},
+		},
+		{
+			"MapGetNextId", retError, "obj_next_id", "BPF_MAP_GET_NEXT_ID",
+			[]patch{
+				choose(0, "start_id"), rename("start_id", "id"),
+				truncateAfter("next_id"),
+			},
+		},
+		{
+			"BtfGetNextId", retError, "obj_next_id", "BPF_BTF_GET_NEXT_ID",
+			[]patch{
+				choose(0, "start_id"), rename("start_id", "id"),
+				replace(btfID, "id", "next_id"),
+				truncateAfter("next_id"),
+			},
+		},
+		// These piggy back on the obj_next_id decl, but only support the
+		// first field...
+		{
+			"BtfGetFdById", retFd, "obj_next_id", "BPF_BTF_GET_FD_BY_ID",
+			[]patch{choose(0, "start_id"), rename("start_id", "id"), truncateAfter("id")},
+		},
+		{
+			"MapGetFdById", retFd, "obj_next_id", "BPF_MAP_GET_FD_BY_ID",
+			[]patch{choose(0, "start_id"), rename("start_id", "id"), truncateAfter("id")},
+		},
+		{
+			"ProgGetFdById", retFd, "obj_next_id", "BPF_PROG_GET_FD_BY_ID",
+			[]patch{choose(0, "start_id"), rename("start_id", "id"), truncateAfter("id")},
+		},
+		{
+			"ObjGetInfoByFd", retError, "info_by_fd", "BPF_OBJ_GET_INFO_BY_FD",
+			[]patch{replace(pointer, "info")},
+		},
+		{
+			"RawTracepointOpen", retFd, "raw_tracepoint_open", "BPF_RAW_TRACEPOINT_OPEN",
+			[]patch{replace(pointer, "name")},
+		},
+		{
+			"BtfLoad", retFd, "btf_load", "BPF_BTF_LOAD",
+			[]patch{replace(pointer, "btf", "btf_log_buf")},
+		},
+		{
+			"LinkCreate", retFd, "link_create", "BPF_LINK_CREATE",
+			[]patch{
+				replace(enumTypes["AttachType"], "attach_type"),
+				choose(4, "target_btf_id"),
+				replace(typeID, "target_btf_id"),
+			},
+		},
+		{
+			"LinkCreateIter", retFd, "link_create", "BPF_LINK_CREATE",
+			[]patch{
+				chooseNth(4, 1),
+				replace(enumTypes["AttachType"], "attach_type"),
+				flattenAnon,
+				replace(pointer, "iter_info"),
+			},
+		},
+		{
+			"LinkCreatePerfEvent", retFd, "link_create", "BPF_LINK_CREATE",
+			[]patch{
+				chooseNth(4, 2),
+				replace(enumTypes["AttachType"], "attach_type"),
+				flattenAnon,
+			},
+		},
+		{
+			"LinkCreateKprobeMulti", retFd, "link_create", "BPF_LINK_CREATE",
+			[]patch{
+				chooseNth(4, 3),
+				replace(enumTypes["AttachType"], "attach_type"),
+				modify(func(m *btf.Member) error {
+					return rename("flags", "kprobe_multi_flags")(m.Type.(*btf.Struct))
+				}, "kprobe_multi"),
+				flattenAnon,
+				replace(pointer, "cookies"),
+				replace(pointer, "addrs"),
+				replace(pointer, "syms"),
+				rename("cnt", "count"),
+			},
+		},
+		{
+			"LinkCreateTracing", retFd, "link_create", "BPF_LINK_CREATE",
+			[]patch{
+				chooseNth(4, 4),
+				replace(enumTypes["AttachType"], "attach_type"),
+				flattenAnon,
+				replace(btfID, "target_btf_id"),
+			},
+		},
+		{
+			"LinkUpdate", retError, "link_update", "BPF_LINK_UPDATE",
+			nil,
+		},
+		{
+			"EnableStats", retFd, "enable_stats", "BPF_ENABLE_STATS",
+			nil,
+		},
+		{
+			"IterCreate", retFd, "iter_create", "BPF_ITER_CREATE",
+			nil,
+		},
+		{
+			"ProgQuery", retError, "prog_query", "BPF_PROG_QUERY",
+			[]patch{
+				replace(enumTypes["AttachType"], "attach_type"),
+				replace(pointer, "prog_ids"),
+				rename("prog_cnt", "prog_count"),
+			},
+		},
+	}
+
+	sort.Slice(attrs, func(i, j int) bool {
+		return attrs[i].goType < attrs[j].goType
+	})
+
+	var bpfAttr *btf.Union
+	if err := spec.TypeByName("bpf_attr", &bpfAttr); err != nil {
+		return nil, err
+	}
+	attrTypes, err := splitUnion(bpfAttr, types{
+		{"map_create", "map_type"},
+		{"map_elem", "map_fd"},
+		{"map_elem_batch", "batch"},
+		{"prog_load", "prog_type"},
+		{"obj_pin", "pathname"},
+		{"prog_attach", "target_fd"},
+		{"prog_run", "test"},
+		{"obj_next_id", ""},
+		{"info_by_fd", "info"},
+		{"prog_query", "query"},
+		{"raw_tracepoint_open", "raw_tracepoint"},
+		{"btf_load", "btf"},
+		{"task_fd_query", "task_fd_query"},
+		{"link_create", "link_create"},
+		{"link_update", "link_update"},
+		{"link_detach", "link_detach"},
+		{"enable_stats", "enable_stats"},
+		{"iter_create", "iter_create"},
+		{"prog_bind_map", "prog_bind_map"},
+	})
+	if err != nil {
+		return nil, fmt.Errorf("splitting bpf_attr: %w", err)
+	}
+
+	for _, s := range attrs {
+		fmt.Println("attr", s.goType)
+
+		t := attrTypes[s.cType]
+		if t == nil {
+			return nil, fmt.Errorf("unknown attr %q", s.cType)
+		}
+
+		goAttrType := s.goType + "Attr"
+		if err := outputPatchedStruct(gf, w, goAttrType, t, s.patches); err != nil {
+			return nil, fmt.Errorf("output %q: %w", goAttrType, err)
+		}
+
+		switch s.ret {
+		case retError:
+			fmt.Fprintf(w, "func %s(attr *%s) error { _, err := BPF(%s, unsafe.Pointer(attr), unsafe.Sizeof(*attr)); return err }\n\n", s.goType, goAttrType, s.cmd)
+		case retFd:
+			fmt.Fprintf(w, "func %s(attr *%s) (*FD, error) { fd, err := BPF(%s, unsafe.Pointer(attr), unsafe.Sizeof(*attr)); if err != nil { return nil, err }; return NewFD(int(fd)) }\n\n", s.goType, goAttrType, s.cmd)
+		}
+	}
+
+	// Link info type specific
+
+	linkInfoExtraTypes := []struct {
+		goType  string
+		cType   string
+		patches []patch
+	}{
+		{"CgroupLinkInfo", "cgroup", []patch{replace(enumTypes["AttachType"], "attach_type")}},
+		{"IterLinkInfo", "iter", []patch{replace(pointer, "target_name"), truncateAfter("target_name_len")}},
+		{"NetNsLinkInfo", "netns", []patch{replace(enumTypes["AttachType"], "attach_type")}},
+		{"RawTracepointLinkInfo", "raw_tracepoint", []patch{replace(pointer, "tp_name")}},
+		{"TracingLinkInfo", "tracing", []patch{
+			replace(enumTypes["AttachType"], "attach_type"),
+			replace(typeID, "target_btf_id")},
+		},
+		{"XDPLinkInfo", "xdp", nil},
+	}
+
+	sort.Slice(linkInfoExtraTypes, func(i, j int) bool {
+		return linkInfoExtraTypes[i].goType < linkInfoExtraTypes[j].goType
+	})
+
+	var bpfLinkInfo *btf.Struct
+	if err := spec.TypeByName("bpf_link_info", &bpfLinkInfo); err != nil {
+		return nil, err
+	}
+
+	member := bpfLinkInfo.Members[len(bpfLinkInfo.Members)-1]
+	bpfLinkInfoUnion, ok := member.Type.(*btf.Union)
+	if !ok {
+		return nil, fmt.Errorf("there is not type-specific union")
+	}
+
+	linkInfoTypes, err := splitUnion(bpfLinkInfoUnion, types{
+		{"raw_tracepoint", "raw_tracepoint"},
+		{"tracing", "tracing"},
+		{"cgroup", "cgroup"},
+		{"iter", "iter"},
+		{"netns", "netns"},
+		{"xdp", "xdp"},
+	})
+	if err != nil {
+		return nil, fmt.Errorf("splitting linkInfo: %w", err)
+	}
+
+	for _, s := range linkInfoExtraTypes {
+		t := linkInfoTypes[s.cType]
+		if err := outputPatchedStruct(gf, w, s.goType, t, s.patches); err != nil {
+			return nil, fmt.Errorf("output %q: %w", s.goType, err)
+		}
+	}
+
+	return w.Bytes(), nil
+}
+
+func outputPatchedStruct(gf *btf.GoFormatter, w *bytes.Buffer, id string, s *btf.Struct, patches []patch) error {
+	s = btf.Copy(s, nil).(*btf.Struct)
+
+	for i, p := range patches {
+		if err := p(s); err != nil {
+			return fmt.Errorf("patch %d: %w", i, err)
+		}
+	}
+
+	decl, err := gf.TypeDeclaration(id, s)
+	if err != nil {
+		return err
+	}
+
+	w.WriteString(decl)
+	w.WriteString("\n\n")
+	return nil
+}
+
+type types []struct {
+	name                string
+	cFieldOrFirstMember string
+}
+
+func splitUnion(union *btf.Union, types types) (map[string]*btf.Struct, error) {
+	structs := make(map[string]*btf.Struct)
+
+	for i, t := range types {
+		member := union.Members[i]
+		s, ok := member.Type.(*btf.Struct)
+		if !ok {
+			return nil, fmt.Errorf("%q: %s is not a struct", t.name, member.Type)
+		}
+
+		if member.Name == "" {
+			// This is an anonymous struct, check the name of the first member instead.
+			if name := s.Members[0].Name; name != t.cFieldOrFirstMember {
+				return nil, fmt.Errorf("first field of %q is %q, not %q", t.name, name, t.cFieldOrFirstMember)
+			}
+		} else if member.Name != t.cFieldOrFirstMember {
+			return nil, fmt.Errorf("name for %q is %q, not %q", t.name, member.Name, t.cFieldOrFirstMember)
+		}
+
+		structs[t.name] = s
+	}
+
+	return structs, nil
+}
+
+type patch func(*btf.Struct) error
+
+func modify(fn func(*btf.Member) error, members ...string) patch {
+	return func(s *btf.Struct) error {
+		want := make(map[string]bool)
+		for _, name := range members {
+			want[name] = true
+		}
+
+		for i, m := range s.Members {
+			if want[m.Name] {
+				if err := fn(&s.Members[i]); err != nil {
+					return err
+				}
+				delete(want, m.Name)
+			}
+		}
+
+		if len(want) == 0 {
+			return nil
+		}
+
+		var missing []string
+		for name := range want {
+			missing = append(missing, name)
+		}
+		sort.Strings(missing)
+
+		return fmt.Errorf("missing members: %v", strings.Join(missing, ", "))
+	}
+}
+
+func modifyNth(fn func(*btf.Member) error, indices ...int) patch {
+	return func(s *btf.Struct) error {
+		for _, i := range indices {
+			if i >= len(s.Members) {
+				return fmt.Errorf("index %d is out of bounds", i)
+			}
+
+			if err := fn(&s.Members[i]); err != nil {
+				return fmt.Errorf("member #%d: %w", i, err)
+			}
+		}
+		return nil
+	}
+}
+
+func replace(t btf.Type, members ...string) patch {
+	return modify(func(m *btf.Member) error {
+		m.Type = t
+		return nil
+	}, members...)
+}
+
+func choose(member int, name string) patch {
+	return modifyNth(func(m *btf.Member) error {
+		union, ok := m.Type.(*btf.Union)
+		if !ok {
+			return fmt.Errorf("member %d is %s, not a union", member, m.Type)
+		}
+
+		for _, um := range union.Members {
+			if um.Name == name {
+				m.Name = um.Name
+				m.Type = um.Type
+				return nil
+			}
+		}
+
+		return fmt.Errorf("%s has no member %q", union, name)
+	}, member)
+}
+
+func chooseNth(member int, n int) patch {
+	return modifyNth(func(m *btf.Member) error {
+		union, ok := m.Type.(*btf.Union)
+		if !ok {
+			return fmt.Errorf("member %d is %s, not a union", member, m.Type)
+		}
+
+		if n >= len(union.Members) {
+			return fmt.Errorf("member %d is out of bounds", n)
+		}
+
+		um := union.Members[n]
+		m.Name = um.Name
+		m.Type = um.Type
+		return nil
+	}, member)
+}
+
+func flattenAnon(s *btf.Struct) error {
+	for i := range s.Members {
+		m := &s.Members[i]
+
+		cs, ok := m.Type.(*btf.Struct)
+		if !ok || cs.TypeName() != "" {
+			continue
+		}
+
+		for j := range cs.Members {
+			cs.Members[j].Offset += m.Offset
+		}
+
+		newMembers := make([]btf.Member, 0, len(s.Members)+len(cs.Members)-1)
+		newMembers = append(newMembers, s.Members[:i]...)
+		newMembers = append(newMembers, cs.Members...)
+		newMembers = append(newMembers, s.Members[i+1:]...)
+
+		s.Members = newMembers
+	}
+
+	return nil
+}
+
+func truncateAfter(name string) patch {
+	return func(s *btf.Struct) error {
+		for i, m := range s.Members {
+			if m.Name != name {
+				continue
+			}
+
+			size, err := btf.Sizeof(m.Type)
+			if err != nil {
+				return err
+			}
+
+			s.Members = s.Members[:i+1]
+			s.Size = m.Offset.Bytes() + uint32(size)
+			return nil
+		}
+
+		return fmt.Errorf("no member %q", name)
+	}
+}
+
+func rename(from, to string) patch {
+	return func(s *btf.Struct) error {
+		for i, m := range s.Members {
+			if m.Name == from {
+				s.Members[i].Name = to
+				return nil
+			}
+		}
+		return fmt.Errorf("no member named %q", from)
+	}
+}
+
+func name(member int, name string) patch {
+	return modifyNth(func(m *btf.Member) error {
+		if m.Name != "" {
+			return fmt.Errorf("member already has name %q", m.Name)
+		}
+
+		m.Name = name
+		return nil
+	}, member)
+}
+
+func replaceWithBytes(members ...string) patch {
+	return modify(func(m *btf.Member) error {
+		if m.BitfieldSize != 0 {
+			return errors.New("replaceWithBytes: member is a bitfield")
+		}
+
+		size, err := btf.Sizeof(m.Type)
+		if err != nil {
+			return fmt.Errorf("replaceWithBytes: size of %s: %w", m.Type, err)
+		}
+
+		m.Type = &btf.Array{
+			Type:   &btf.Int{Size: 1},
+			Nelems: uint32(size),
+		}
+
+		return nil
+	}, members...)
+}
@@ -0,0 +1,51 @@
+package internal
+
+import (
+	"fmt"
+	"os"
+	"strings"
+)
+
+// PossibleCPUs returns the max number of CPUs a system may possibly have
+// Logical CPU numbers must be of the form 0-n
+var PossibleCPUs = Memoize(func() (int, error) {
+	return parseCPUsFromFile("/sys/devices/system/cpu/possible")
+})
+
+func parseCPUsFromFile(path string) (int, error) {
+	spec, err := os.ReadFile(path)
+	if err != nil {
+		return 0, err
+	}
+
+	n, err := parseCPUs(string(spec))
+	if err != nil {
+		return 0, fmt.Errorf("can't parse %s: %v", path, err)
+	}
+
+	return n, nil
+}
+
+// parseCPUs parses the number of cpus from a string produced
+// by bitmap_list_string() in the Linux kernel.
+// Multiple ranges are rejected, since they can't be unified
+// into a single number.
+// This is the format of /sys/devices/system/cpu/possible, it
+// is not suitable for /sys/devices/system/cpu/online, etc.
+func parseCPUs(spec string) (int, error) {
+	if strings.Trim(spec, "\n") == "0" {
+		return 1, nil
+	}
+
+	var low, high int
+	n, err := fmt.Sscanf(spec, "%d-%d\n", &low, &high)
+	if n != 2 || err != nil {
+		return 0, fmt.Errorf("invalid format: %s", spec)
+	}
+	if low != 0 {
+		return 0, fmt.Errorf("CPU spec doesn't start at zero: %s", spec)
+	}
+
+	// cpus is 0 indexed
+	return high + 1, nil
+}
@@ -0,0 +1,32 @@
+package internal
+
+import (
+	"testing"
+)
+
+func TestParseCPUs(t *testing.T) {
+	for str, result := range map[string]int{
+		"0-1":   2,
+		"0-2\n": 3,
+		"0":     1,
+	} {
+		n, err := parseCPUs(str)
+		if err != nil {
+			t.Errorf("Can't parse `%s`: %v", str, err)
+		} else if n != result {
+			t.Error("Parsing", str, "returns", n, "instead of", result)
+		}
+	}
+
+	for _, str := range []string{
+		"0,3-4",
+		"0-",
+		"1,",
+		"",
+	} {
+		_, err := parseCPUs(str)
+		if err == nil {
+			t.Error("Parsed invalid format:", str)
+		}
+	}
+}
@@ -0,0 +1,91 @@
+package internal
+
+import "math/bits"
+
+// Deque implements a double ended queue.
+type Deque[T any] struct {
+	elems       []T
+	read, write uint64
+	mask        uint64
+}
+
+// Reset clears the contents of the deque while retaining the backing buffer.
+func (dq *Deque[T]) Reset() {
+	var zero T
+
+	for i := dq.read; i < dq.write; i++ {
+		dq.elems[i&dq.mask] = zero
+	}
+
+	dq.read, dq.write = 0, 0
+}
+
+func (dq *Deque[T]) Empty() bool {
+	return dq.read == dq.write
+}
+
+// Push adds an element to the end.
+func (dq *Deque[T]) Push(e T) {
+	dq.Grow(1)
+	dq.elems[dq.write&dq.mask] = e
+	dq.write++
+}
+
+// Shift returns the first element or the zero value.
+func (dq *Deque[T]) Shift() T {
+	var zero T
+
+	if dq.Empty() {
+		return zero
+	}
+
+	index := dq.read & dq.mask
+	t := dq.elems[index]
+	dq.elems[index] = zero
+	dq.read++
+	return t
+}
+
+// Pop returns the last element or the zero value.
+func (dq *Deque[T]) Pop() T {
+	var zero T
+
+	if dq.Empty() {
+		return zero
+	}
+
+	dq.write--
+	index := dq.write & dq.mask
+	t := dq.elems[index]
+	dq.elems[index] = zero
+	return t
+}
+
+// Grow the deque's capacity, if necessary, to guarantee space for another n
+// elements.
+func (dq *Deque[T]) Grow(n int) {
+	have := dq.write - dq.read
+	need := have + uint64(n)
+	if need < have {
+		panic("overflow")
+	}
+	if uint64(len(dq.elems)) >= need {
+		return
+	}
+
+	// Round up to the new power of two which is at least 8.
+	// See https://jameshfisher.com/2018/03/30/round-up-power-2/
+	capacity := 1 << (64 - bits.LeadingZeros64(need-1))
+	if capacity < 8 {
+		capacity = 8
+	}
+
+	elems := make([]T, have, capacity)
+	pivot := dq.read & dq.mask
+	copied := copy(elems, dq.elems[pivot:])
+	copy(elems[copied:], dq.elems[:pivot])
+
+	dq.elems = elems[:capacity]
+	dq.mask = uint64(capacity) - 1
+	dq.read, dq.write = 0, have
+}
@@ -0,0 +1,82 @@
+package internal
+
+import "testing"
+
+func TestDeque(t *testing.T) {
+	t.Run("pop", func(t *testing.T) {
+		var dq Deque[int]
+		dq.Push(1)
+		dq.Push(2)
+
+		if dq.Pop() != 2 {
+			t.Error("Didn't pop 2 first")
+		}
+
+		if dq.Pop() != 1 {
+			t.Error("Didn't pop 1 second")
+		}
+
+		if dq.Pop() != 0 {
+			t.Error("Didn't pop zero")
+		}
+	})
+
+	t.Run("shift", func(t *testing.T) {
+		var td Deque[int]
+		td.Push(1)
+		td.Push(2)
+
+		if td.Shift() != 1 {
+			t.Error("Didn't shift 1 first")
+		}
+
+		if td.Shift() != 2 {
+			t.Error("Didn't shift b second")
+		}
+
+		if td.Shift() != 0 {
+			t.Error("Didn't shift zero")
+		}
+	})
+
+	t.Run("push", func(t *testing.T) {
+		var td Deque[int]
+		td.Push(1)
+		td.Push(2)
+		td.Shift()
+
+		for i := 1; i <= 12; i++ {
+			td.Push(i)
+		}
+
+		if td.Shift() != 2 {
+			t.Error("Didn't shift 2 first")
+		}
+		for i := 1; i <= 12; i++ {
+			if v := td.Shift(); v != i {
+				t.Fatalf("Shifted %d at pos %d", v, i)
+			}
+		}
+	})
+
+	t.Run("grow", func(t *testing.T) {
+		var td Deque[int]
+		td.Push(1)
+		td.Push(2)
+		td.Push(3)
+		td.Shift()
+
+		td.Grow(7)
+		if len(td.elems) < 9 {
+			t.Fatal("Expected at least 9 elements, got", len(td.elems))
+		}
+
+		if cap(td.elems)&(cap(td.elems)-1) != 0 {
+			t.Fatalf("Capacity %d is not a power of two", cap(td.elems))
+		}
+
+		if td.Shift() != 2 || td.Shift() != 3 {
+			t.Fatal("Elements don't match after grow")
+		}
+	})
+}
@@ -0,0 +1,102 @@
+package internal
+
+import (
+	"debug/elf"
+	"fmt"
+	"io"
+)
+
+type SafeELFFile struct {
+	*elf.File
+}
+
+// NewSafeELFFile reads an ELF safely.
+//
+// Any panic during parsing is turned into an error. This is necessary since
+// there are a bunch of unfixed bugs in debug/elf.
+//
+// https://github.com/golang/go/issues?q=is%3Aissue+is%3Aopen+debug%2Felf+in%3Atitle
+func NewSafeELFFile(r io.ReaderAt) (safe *SafeELFFile, err error) {
+	defer func() {
+		r := recover()
+		if r == nil {
+			return
+		}
+
+		safe = nil
+		err = fmt.Errorf("reading ELF file panicked: %s", r)
+	}()
+
+	file, err := elf.NewFile(r)
+	if err != nil {
+		return nil, err
+	}
+
+	return &SafeELFFile{file}, nil
+}
+
+// OpenSafeELFFile reads an ELF from a file.
+//
+// It works like NewSafeELFFile, with the exception that safe.Close will
+// close the underlying file.
+func OpenSafeELFFile(path string) (safe *SafeELFFile, err error) {
+	defer func() {
+		r := recover()
+		if r == nil {
+			return
+		}
+
+		safe = nil
+		err = fmt.Errorf("reading ELF file panicked: %s", r)
+	}()
+
+	file, err := elf.Open(path)
+	if err != nil {
+		return nil, err
+	}
+
+	return &SafeELFFile{file}, nil
+}
+
+// Symbols is the safe version of elf.File.Symbols.
+func (se *SafeELFFile) Symbols() (syms []elf.Symbol, err error) {
+	defer func() {
+		r := recover()
+		if r == nil {
+			return
+		}
+
+		syms = nil
+		err = fmt.Errorf("reading ELF symbols panicked: %s", r)
+	}()
+
+	syms, err = se.File.Symbols()
+	return
+}
+
+// DynamicSymbols is the safe version of elf.File.DynamicSymbols.
+func (se *SafeELFFile) DynamicSymbols() (syms []elf.Symbol, err error) {
+	defer func() {
+		r := recover()
+		if r == nil {
+			return
+		}
+
+		syms = nil
+		err = fmt.Errorf("reading ELF dynamic symbols panicked: %s", r)
+	}()
+
+	syms, err = se.File.DynamicSymbols()
+	return
+}
+
+// SectionsByType returns all sections in the file with the specified section type.
+func (se *SafeELFFile) SectionsByType(typ elf.SectionType) []*elf.Section {
+	sections := make([]*elf.Section, 0, 1)
+	for _, section := range se.Sections {
+		if section.Type == typ {
+			sections = append(sections, section)
+		}
+	}
+	return sections
+}
@@ -0,0 +1,12 @@
+//go:build armbe || arm64be || mips || mips64 || mips64p32 || ppc64 || s390 || s390x || sparc || sparc64
+
+package internal
+
+import "encoding/binary"
+
+// NativeEndian is set to either binary.BigEndian or binary.LittleEndian,
+// depending on the host's endianness.
+var NativeEndian binary.ByteOrder = binary.BigEndian
+
+// ClangEndian is set to either "el" or "eb" depending on the host's endianness.
+const ClangEndian = "eb"
@@ -0,0 +1,12 @@
+//go:build 386 || amd64 || amd64p32 || arm || arm64 || loong64 || mipsle || mips64le || mips64p32le || ppc64le || riscv64
+
+package internal
+
+import "encoding/binary"
+
+// NativeEndian is set to either binary.BigEndian or binary.LittleEndian,
+// depending on the host's endianness.
+var NativeEndian binary.ByteOrder = binary.LittleEndian
+
+// ClangEndian is set to either "el" or "eb" depending on the host's endianness.
+const ClangEndian = "el"
@@ -0,0 +1,225 @@
+package epoll
+
+import (
+	"fmt"
+	"math"
+	"os"
+	"runtime"
+	"sync"
+	"time"
+
+	"github.com/cilium/ebpf/internal"
+	"github.com/cilium/ebpf/internal/unix"
+)
+
+// Poller waits for readiness notifications from multiple file descriptors.
+//
+// The wait can be interrupted by calling Close.
+type Poller struct {
+	// mutexes protect the fields declared below them. If you need to
+	// acquire both at once you must lock epollMu before eventMu.
+	epollMu sync.Mutex
+	epollFd int
+
+	eventMu sync.Mutex
+	event   *eventFd
+}
+
+func New() (*Poller, error) {
+	epollFd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC)
+	if err != nil {
+		return nil, fmt.Errorf("create epoll fd: %v", err)
+	}
+
+	p := &Poller{epollFd: epollFd}
+	p.event, err = newEventFd()
+	if err != nil {
+		unix.Close(epollFd)
+		return nil, err
+	}
+
+	if err := p.Add(p.event.raw, 0); err != nil {
+		unix.Close(epollFd)
+		p.event.close()
+		return nil, fmt.Errorf("add eventfd: %w", err)
+	}
+
+	runtime.SetFinalizer(p, (*Poller).Close)
+	return p, nil
+}
+
+// Close the poller.
+//
+// Interrupts any calls to Wait. Multiple calls to Close are valid, but subsequent
+// calls will return os.ErrClosed.
+func (p *Poller) Close() error {
+	runtime.SetFinalizer(p, nil)
+
+	// Interrupt Wait() via the event fd if it's currently blocked.
+	if err := p.wakeWait(); err != nil {
+		return err
+	}
+
+	// Acquire the lock. This ensures that Wait isn't running.
+	p.epollMu.Lock()
+	defer p.epollMu.Unlock()
+
+	// Prevent other calls to Close().
+	p.eventMu.Lock()
+	defer p.eventMu.Unlock()
+
+	if p.epollFd != -1 {
+		unix.Close(p.epollFd)
+		p.epollFd = -1
+	}
+
+	if p.event != nil {
+		p.event.close()
+		p.event = nil
+	}
+
+	return nil
+}
+
+// Add an fd to the poller.
+//
+// id is returned by Wait in the unix.EpollEvent.Pad field any may be zero. It
+// must not exceed math.MaxInt32.
+//
+// Add is blocked by Wait.
+func (p *Poller) Add(fd int, id int) error {
+	if int64(id) > math.MaxInt32 {
+		return fmt.Errorf("unsupported id: %d", id)
+	}
+
+	p.epollMu.Lock()
+	defer p.epollMu.Unlock()
+
+	if p.epollFd == -1 {
+		return fmt.Errorf("epoll add: %w", os.ErrClosed)
+	}
+
+	// The representation of EpollEvent isn't entirely accurate.
+	// Pad is fully useable, not just padding. Hence we stuff the
+	// id in there, which allows us to identify the event later (e.g.,
+	// in case of perf events, which CPU sent it).
+	event := unix.EpollEvent{
+		Events: unix.EPOLLIN,
+		Fd:     int32(fd),
+		Pad:    int32(id),
+	}
+
+	if err := unix.EpollCtl(p.epollFd, unix.EPOLL_CTL_ADD, fd, &event); err != nil {
+		return fmt.Errorf("add fd to epoll: %v", err)
+	}
+
+	return nil
+}
+
+// Wait for events.
+//
+// Returns the number of pending events or an error wrapping os.ErrClosed if
+// Close is called, or os.ErrDeadlineExceeded if EpollWait timeout.
+func (p *Poller) Wait(events []unix.EpollEvent, deadline time.Time) (int, error) {
+	p.epollMu.Lock()
+	defer p.epollMu.Unlock()
+
+	if p.epollFd == -1 {
+		return 0, fmt.Errorf("epoll wait: %w", os.ErrClosed)
+	}
+
+	for {
+		timeout := int(-1)
+		if !deadline.IsZero() {
+			msec := time.Until(deadline).Milliseconds()
+			if msec < 0 {
+				// Deadline is in the past.
+				msec = 0
+			} else if msec > math.MaxInt {
+				// Deadline is too far in the future.
+				msec = math.MaxInt
+			}
+			timeout = int(msec)
+		}
+
+		n, err := unix.EpollWait(p.epollFd, events, timeout)
+		if temp, ok := err.(temporaryError); ok && temp.Temporary() {
+			// Retry the syscall if we were interrupted, see https://github.com/golang/go/issues/20400
+			continue
+		}
+
+		if err != nil {
+			return 0, err
+		}
+
+		if n == 0 {
+			return 0, fmt.Errorf("epoll wait: %w", os.ErrDeadlineExceeded)
+		}
+
+		for _, event := range events[:n] {
+			if int(event.Fd) == p.event.raw {
+				// Since we don't read p.event the event is never cleared and
+				// we'll keep getting this wakeup until Close() acquires the
+				// lock and sets p.epollFd = -1.
+				return 0, fmt.Errorf("epoll wait: %w", os.ErrClosed)
+			}
+		}
+
+		return n, nil
+	}
+}
+
+type temporaryError interface {
+	Temporary() bool
+}
+
+// wakeWait unblocks Wait if it's epoll_wait.
+func (p *Poller) wakeWait() error {
+	p.eventMu.Lock()
+	defer p.eventMu.Unlock()
+
+	if p.event == nil {
+		return fmt.Errorf("epoll wake: %w", os.ErrClosed)
+	}
+
+	return p.event.add(1)
+}
+
+// eventFd wraps a Linux eventfd.
+//
+// An eventfd acts like a counter: writes add to the counter, reads retrieve
+// the counter and reset it to zero. Reads also block if the counter is zero.
+//
+// See man 2 eventfd.
+type eventFd struct {
+	file *os.File
+	// prefer raw over file.Fd(), since the latter puts the file into blocking
+	// mode.
+	raw int
+}
+
+func newEventFd() (*eventFd, error) {
+	fd, err := unix.Eventfd(0, unix.O_CLOEXEC|unix.O_NONBLOCK)
+	if err != nil {
+		return nil, err
+	}
+	file := os.NewFile(uintptr(fd), "event")
+	return &eventFd{file, fd}, nil
+}
+
+func (efd *eventFd) close() error {
+	return efd.file.Close()
+}
+
+func (efd *eventFd) add(n uint64) error {
+	var buf [8]byte
+	internal.NativeEndian.PutUint64(buf[:], 1)
+	_, err := efd.file.Write(buf[:])
+	return err
+}
+
+func (efd *eventFd) read() (uint64, error) {
+	var buf [8]byte
+	_, err := efd.file.Read(buf[:])
+	return internal.NativeEndian.Uint64(buf[:]), err
+}
@@ -0,0 +1,130 @@
+package epoll
+
+import (
+	"errors"
+	"math"
+	"os"
+	"testing"
+	"time"
+
+	"github.com/cilium/ebpf/internal/unix"
+)
+
+func TestPoller(t *testing.T) {
+	t.Parallel()
+
+	event, poller := mustNewPoller(t)
+
+	done := make(chan struct{}, 1)
+	read := func() {
+		defer func() {
+			done <- struct{}{}
+		}()
+
+		events := make([]unix.EpollEvent, 1)
+
+		n, err := poller.Wait(events, time.Time{})
+		if errors.Is(err, os.ErrClosed) {
+			return
+		}
+
+		if err != nil {
+			t.Error("Error from wait:", err)
+			return
+		}
+
+		if n != 1 {
+			t.Errorf("Got %d instead of 1 events", n)
+		}
+
+		if e := events[0]; e.Pad != 42 {
+			t.Errorf("Incorrect value in EpollEvent.Pad: %d != 42", e.Pad)
+		}
+	}
+
+	if err := event.add(1); err != nil {
+		t.Fatal(err)
+	}
+
+	go read()
+	select {
+	case <-done:
+	case <-time.After(time.Second):
+		t.Fatal("Timed out")
+	}
+
+	if _, err := event.read(); err != nil {
+		t.Fatal(err)
+	}
+
+	go read()
+	select {
+	case <-done:
+		t.Fatal("Wait doesn't block")
+	case <-time.After(time.Second):
+	}
+
+	if err := poller.Close(); err != nil {
+		t.Fatal("Close returns an error:", err)
+	}
+
+	select {
+	case <-done:
+	case <-time.After(time.Second):
+		t.Fatal("Close doesn't unblock Wait")
+	}
+
+	if err := poller.Close(); !errors.Is(err, os.ErrClosed) {
+		t.Fatal("Closing a second time doesn't return ErrClosed:", err)
+	}
+}
+
+func TestPollerDeadline(t *testing.T) {
+	t.Parallel()
+
+	_, poller := mustNewPoller(t)
+	events := make([]unix.EpollEvent, 1)
+
+	_, err := poller.Wait(events, time.Now().Add(-time.Second))
+	if !errors.Is(err, os.ErrDeadlineExceeded) {
+		t.Fatal("Expected os.ErrDeadlineExceeded on deadline in the past, got", err)
+	}
+
+	done := make(chan struct{})
+	go func() {
+		defer close(done)
+
+		_, err := poller.Wait(events, time.Now().Add(math.MaxInt64))
+		if !errors.Is(err, os.ErrClosed) {
+			t.Error("Expected os.ErrClosed when interrupting deadline, got", err)
+		}
+	}()
+
+	// Wait for the goroutine to enter the syscall.
+	time.Sleep(time.Second)
+
+	poller.Close()
+	<-done
+}
+
+func mustNewPoller(t *testing.T) (*eventFd, *Poller) {
+	t.Helper()
+
+	event, err := newEventFd()
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Cleanup(func() { event.close() })
+
+	poller, err := New()
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Cleanup(func() { poller.Close() })
+
+	if err := poller.Add(event.raw, 42); err != nil {
+		t.Fatal("Can't add fd:", err)
+	}
+
+	return event, poller
+}
@@ -0,0 +1,198 @@
+package internal
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"strings"
+)
+
+// ErrorWithLog wraps err in a VerifierError that includes the parsed verifier
+// log buffer.
+//
+// The default error output is a summary of the full log. The latter can be
+// accessed via VerifierError.Log or by formatting the error, see Format.
+func ErrorWithLog(source string, err error, log []byte, truncated bool) *VerifierError {
+	const whitespace = "\t\r\v\n "
+
+	// Convert verifier log C string by truncating it on the first 0 byte
+	// and trimming trailing whitespace before interpreting as a Go string.
+	if i := bytes.IndexByte(log, 0); i != -1 {
+		log = log[:i]
+	}
+
+	log = bytes.Trim(log, whitespace)
+	if len(log) == 0 {
+		return &VerifierError{source, err, nil, truncated}
+	}
+
+	logLines := bytes.Split(log, []byte{'\n'})
+	lines := make([]string, 0, len(logLines))
+	for _, line := range logLines {
+		// Don't remove leading white space on individual lines. We rely on it
+		// when outputting logs.
+		lines = append(lines, string(bytes.TrimRight(line, whitespace)))
+	}
+
+	return &VerifierError{source, err, lines, truncated}
+}
+
+// VerifierError includes information from the eBPF verifier.
+//
+// It summarises the log output, see Format if you want to output the full contents.
+type VerifierError struct {
+	source string
+	// The error which caused this error.
+	Cause error
+	// The verifier output split into lines.
+	Log []string
+	// Whether the log output is truncated, based on several heuristics.
+	Truncated bool
+}
+
+func (le *VerifierError) Unwrap() error {
+	return le.Cause
+}
+
+func (le *VerifierError) Error() string {
+	log := le.Log
+	if n := len(log); n > 0 && strings.HasPrefix(log[n-1], "processed ") {
+		// Get rid of "processed 39 insns (limit 1000000) ..." from summary.
+		log = log[:n-1]
+	}
+
+	var b strings.Builder
+	fmt.Fprintf(&b, "%s: %s", le.source, le.Cause.Error())
+
+	n := len(log)
+	if n == 0 {
+		return b.String()
+	}
+
+	lines := log[n-1:]
+	if n >= 2 && (includePreviousLine(log[n-1]) || le.Truncated) {
+		// Add one more line of context if it aids understanding the error.
+		lines = log[n-2:]
+	}
+
+	for _, line := range lines {
+		b.WriteString(": ")
+		b.WriteString(strings.TrimSpace(line))
+	}
+
+	omitted := len(le.Log) - len(lines)
+	if omitted == 0 && !le.Truncated {
+		return b.String()
+	}
+
+	b.WriteString(" (")
+	if le.Truncated {
+		b.WriteString("truncated")
+	}
+
+	if omitted > 0 {
+		if le.Truncated {
+			b.WriteString(", ")
+		}
+		fmt.Fprintf(&b, "%d line(s) omitted", omitted)
+	}
+	b.WriteString(")")
+
+	return b.String()
+}
+
+// includePreviousLine returns true if the given line likely is better
+// understood with additional context from the preceding line.
+func includePreviousLine(line string) bool {
+	// We need to find a good trade off between understandable error messages
+	// and too much complexity here. Checking the string prefix is ok, requiring
+	// regular expressions to do it is probably overkill.
+
+	if strings.HasPrefix(line, "\t") {
+		// [13] STRUCT drm_rect size=16 vlen=4
+		// \tx1 type_id=2
+		return true
+	}
+
+	if len(line) >= 2 && line[0] == 'R' && line[1] >= '0' && line[1] <= '9' {
+		// 0: (95) exit
+		// R0 !read_ok
+		return true
+	}
+
+	if strings.HasPrefix(line, "invalid bpf_context access") {
+		// 0: (79) r6 = *(u64 *)(r1 +0)
+		// func '__x64_sys_recvfrom' arg0 type FWD is not a struct
+		// invalid bpf_context access off=0 size=8
+		return true
+	}
+
+	return false
+}
+
+// Format the error.
+//
+// Understood verbs are %s and %v, which are equivalent to calling Error(). %v
+// allows outputting additional information using the following flags:
+//
+//	%+<width>v: Output the first <width> lines, or all lines if no width is given.
+//	%-<width>v: Output the last <width> lines, or all lines if no width is given.
+//
+// Use width to specify how many lines to output. Use the '-' flag to output
+// lines from the end of the log instead of the beginning.
+func (le *VerifierError) Format(f fmt.State, verb rune) {
+	switch verb {
+	case 's':
+		_, _ = io.WriteString(f, le.Error())
+
+	case 'v':
+		n, haveWidth := f.Width()
+		if !haveWidth || n > len(le.Log) {
+			n = len(le.Log)
+		}
+
+		if !f.Flag('+') && !f.Flag('-') {
+			if haveWidth {
+				_, _ = io.WriteString(f, "%!v(BADWIDTH)")
+				return
+			}
+
+			_, _ = io.WriteString(f, le.Error())
+			return
+		}
+
+		if f.Flag('+') && f.Flag('-') {
+			_, _ = io.WriteString(f, "%!v(BADFLAG)")
+			return
+		}
+
+		fmt.Fprintf(f, "%s: %s:", le.source, le.Cause.Error())
+
+		omitted := len(le.Log) - n
+		lines := le.Log[:n]
+		if f.Flag('-') {
+			// Print last instead of first lines.
+			lines = le.Log[len(le.Log)-n:]
+			if omitted > 0 {
+				fmt.Fprintf(f, "\n\t(%d line(s) omitted)", omitted)
+			}
+		}
+
+		for _, line := range lines {
+			fmt.Fprintf(f, "\n\t%s", line)
+		}
+
+		if !f.Flag('-') {
+			if omitted > 0 {
+				fmt.Fprintf(f, "\n\t(%d line(s) omitted)", omitted)
+			}
+		}
+
+		if le.Truncated {
+			fmt.Fprintf(f, "\n\t(truncated)")
+		}
+
+	default:
+		fmt.Fprintf(f, "%%!%c(BADVERB)", verb)
+	}
+}
@@ -0,0 +1,87 @@
+package internal
+
+import (
+	"errors"
+	"os"
+	"testing"
+
+	"github.com/cilium/ebpf/internal/unix"
+	qt "github.com/frankban/quicktest"
+)
+
+func TestVerifierErrorWhitespace(t *testing.T) {
+	b := []byte("unreachable insn 28")
+	b = append(b,
+		0xa,  // \n
+		0xd,  // \r
+		0x9,  // \t
+		0x20, // space
+		0, 0, // trailing NUL bytes
+	)
+
+	err := ErrorWithLog("frob", errors.New("test"), b, false)
+	qt.Assert(t, err.Error(), qt.Equals, "frob: test: unreachable insn 28")
+
+	for _, log := range [][]byte{
+		nil,
+		[]byte("\x00"),
+		[]byte(" "),
+	} {
+		err = ErrorWithLog("frob", errors.New("test"), log, false)
+		qt.Assert(t, err.Error(), qt.Equals, "frob: test", qt.Commentf("empty log %q has incorrect format", log))
+	}
+}
+
+func TestVerifierErrorWrapping(t *testing.T) {
+	ve := ErrorWithLog("frob", unix.ENOENT, nil, false)
+	qt.Assert(t, ve, qt.ErrorIs, unix.ENOENT, qt.Commentf("should wrap provided error"))
+	qt.Assert(t, ve.Truncated, qt.IsFalse, qt.Commentf("verifier log should not be marked as truncated"))
+
+	ve = ErrorWithLog("frob", unix.EINVAL, nil, true)
+	qt.Assert(t, ve, qt.ErrorIs, unix.EINVAL, qt.Commentf("should wrap provided error"))
+	qt.Assert(t, ve.Truncated, qt.IsTrue, qt.Commentf("verifier log should be marked as truncated"))
+
+	ve = ErrorWithLog("frob", unix.EINVAL, []byte("foo"), false)
+	qt.Assert(t, ve, qt.ErrorIs, unix.EINVAL, qt.Commentf("should wrap provided error"))
+	qt.Assert(t, ve.Error(), qt.Contains, "foo", qt.Commentf("verifier log should appear in error string"))
+
+	ve = ErrorWithLog("frob", unix.ENOSPC, []byte("foo"), true)
+	qt.Assert(t, ve, qt.ErrorIs, unix.ENOSPC, qt.Commentf("should wrap provided error"))
+	qt.Assert(t, ve.Error(), qt.Contains, "foo", qt.Commentf("verifier log should appear in error string"))
+	qt.Assert(t, ve.Truncated, qt.IsTrue, qt.Commentf("verifier log should be marked truncated"))
+}
+
+func TestVerifierErrorSummary(t *testing.T) {
+	// Suppress the last line containing 'processed ... insns'.
+	errno524 := readErrorFromFile(t, "testdata/errno524.log")
+	qt.Assert(t, errno524.Error(), qt.Contains, "JIT doesn't support bpf-to-bpf calls")
+	qt.Assert(t, errno524.Error(), qt.Not(qt.Contains), "processed 39 insns")
+
+	// Include the previous line if the current one starts with a tab.
+	invalidMember := readErrorFromFile(t, "testdata/invalid-member.log")
+	qt.Assert(t, invalidMember.Error(), qt.Contains, "STRUCT task_struct size=7744 vlen=218: cpus_mask type_id=109 bitfield_size=0 bits_offset=7744 Invalid member")
+
+	// Only include the last line.
+	issue43 := readErrorFromFile(t, "testdata/issue-43.log")
+	qt.Assert(t, issue43.Error(), qt.Contains, "[11] FUNC helper_func2 type_id=10 vlen != 0")
+	qt.Assert(t, issue43.Error(), qt.Not(qt.Contains), "[10] FUNC_PROTO (anon) return=3 args=(3 arg)")
+
+	// Include instruction that caused invalid register access.
+	invalidR0 := readErrorFromFile(t, "testdata/invalid-R0.log")
+	qt.Assert(t, invalidR0.Error(), qt.Contains, "0: (95) exit: R0 !read_ok")
+
+	// Include symbol that doesn't match context type.
+	invalidCtx := readErrorFromFile(t, "testdata/invalid-ctx-access.log")
+	qt.Assert(t, invalidCtx.Error(), qt.Contains, "func '__x64_sys_recvfrom' arg0 type FWD is not a struct: invalid bpf_context access off=0 size=8")
+}
+
+func readErrorFromFile(tb testing.TB, file string) *VerifierError {
+	tb.Helper()
+
+	contents, err := os.ReadFile(file)
+	if err != nil {
+		tb.Fatal("Read file:", err)
+	}
+
+	return ErrorWithLog("file", unix.EINVAL, contents, false)
+}
@@ -0,0 +1,184 @@
+package internal
+
+import (
+	"errors"
+	"fmt"
+	"sync"
+)
+
+// ErrNotSupported indicates that a feature is not supported by the current kernel.
+var ErrNotSupported = errors.New("not supported")
+
+// UnsupportedFeatureError is returned by FeatureTest() functions.
+type UnsupportedFeatureError struct {
+	// The minimum Linux mainline version required for this feature.
+	// Used for the error string, and for sanity checking during testing.
+	MinimumVersion Version
+
+	// The name of the feature that isn't supported.
+	Name string
+}
+
+func (ufe *UnsupportedFeatureError) Error() string {
+	if ufe.MinimumVersion.Unspecified() {
+		return fmt.Sprintf("%s not supported", ufe.Name)
+	}
+	return fmt.Sprintf("%s not supported (requires >= %s)", ufe.Name, ufe.MinimumVersion)
+}
+
+// Is indicates that UnsupportedFeatureError is ErrNotSupported.
+func (ufe *UnsupportedFeatureError) Is(target error) bool {
+	return target == ErrNotSupported
+}
+
+// FeatureTest caches the result of a [FeatureTestFn].
+//
+// Fields should not be modified after creation.
+type FeatureTest struct {
+	// The name of the feature being detected.
+	Name string
+	// Version in in the form Major.Minor[.Patch].
+	Version string
+	// The feature test itself.
+	Fn FeatureTestFn
+
+	mu     sync.RWMutex
+	done   bool
+	result error
+}
+
+// FeatureTestFn is used to determine whether the kernel supports
+// a certain feature.
+//
+// The return values have the following semantics:
+//
+//	err == ErrNotSupported: the feature is not available
+//	err == nil: the feature is available
+//	err != nil: the test couldn't be executed
+type FeatureTestFn func() error
+
+// NewFeatureTest is a convenient way to create a single [FeatureTest].
+func NewFeatureTest(name, version string, fn FeatureTestFn) func() error {
+	ft := &FeatureTest{
+		Name:    name,
+		Version: version,
+		Fn:      fn,
+	}
+
+	return ft.execute
+}
+
+// execute the feature test.
+//
+// The result is cached if the test is conclusive.
+//
+// See [FeatureTestFn] for the meaning of the returned error.
+func (ft *FeatureTest) execute() error {
+	ft.mu.RLock()
+	result, done := ft.result, ft.done
+	ft.mu.RUnlock()
+
+	if done {
+		return result
+	}
+
+	ft.mu.Lock()
+	defer ft.mu.Unlock()
+
+	// The test may have been executed by another caller while we were
+	// waiting to acquire ft.mu.
+	if ft.done {
+		return ft.result
+	}
+
+	err := ft.Fn()
+	if err == nil {
+		ft.done = true
+		return nil
+	}
+
+	if errors.Is(err, ErrNotSupported) {
+		var v Version
+		if ft.Version != "" {
+			v, err = NewVersion(ft.Version)
+			if err != nil {
+				return fmt.Errorf("feature %s: %w", ft.Name, err)
+			}
+		}
+
+		ft.done = true
+		ft.result = &UnsupportedFeatureError{
+			MinimumVersion: v,
+			Name:           ft.Name,
+		}
+
+		return ft.result
+	}
+
+	// We couldn't execute the feature test to a point
+	// where it could make a determination.
+	// Don't cache the result, just return it.
+	return fmt.Errorf("detect support for %s: %w", ft.Name, err)
+}
+
+// FeatureMatrix groups multiple related feature tests into a map.
+//
+// Useful when there is a small number of discrete features which are known
+// at compile time.
+//
+// It must not be modified concurrently with calling [FeatureMatrix.Result].
+type FeatureMatrix[K comparable] map[K]*FeatureTest
+
+// Result returns the outcome of the feature test for the given key.
+//
+// It's safe to call this function concurrently.
+func (fm FeatureMatrix[K]) Result(key K) error {
+	ft, ok := fm[key]
+	if !ok {
+		return fmt.Errorf("no feature probe for %v", key)
+	}
+
+	return ft.execute()
+}
+
+// FeatureCache caches a potentially unlimited number of feature probes.
+//
+// Useful when there is a high cardinality for a feature test.
+type FeatureCache[K comparable] struct {
+	mu       sync.RWMutex
+	newTest  func(K) *FeatureTest
+	features map[K]*FeatureTest
+}
+
+func NewFeatureCache[K comparable](newTest func(K) *FeatureTest) *FeatureCache[K] {
+	return &FeatureCache[K]{
+		newTest:  newTest,
+		features: make(map[K]*FeatureTest),
+	}
+}
+
+func (fc *FeatureCache[K]) Result(key K) error {
+	// NB: Executing the feature test happens without fc.mu taken.
+	return fc.retrieve(key).execute()
+}
+
+func (fc *FeatureCache[K]) retrieve(key K) *FeatureTest {
+	fc.mu.RLock()
+	ft := fc.features[key]
+	fc.mu.RUnlock()
+
+	if ft != nil {
+		return ft
+	}
+
+	fc.mu.Lock()
+	defer fc.mu.Unlock()
+
+	if ft := fc.features[key]; ft != nil {
+		return ft
+	}
+
+	ft = fc.newTest(key)
+	fc.features[key] = ft
+	return ft
+}
@@ -0,0 +1,71 @@
+package internal
+
+import (
+	"errors"
+	"strings"
+	"testing"
+
+	"github.com/cilium/ebpf/internal/testutils/fdtrace"
+)
+
+func TestMain(m *testing.M) {
+	fdtrace.TestMain(m)
+}
+
+func TestFeatureTest(t *testing.T) {
+	var called bool
+
+	fn := NewFeatureTest("foo", "1.0", func() error {
+		called = true
+		return nil
+	})
+
+	if called {
+		t.Error("Function was called too early")
+	}
+
+	err := fn()
+	if !called {
+		t.Error("Function wasn't called")
+	}
+
+	if err != nil {
+		t.Error("Unexpected negative result:", err)
+	}
+
+	fn = NewFeatureTest("bar", "2.1.1", func() error {
+		return ErrNotSupported
+	})
+
+	err = fn()
+	if err == nil {
+		t.Fatal("Unexpected positive result")
+	}
+
+	fte, ok := err.(*UnsupportedFeatureError)
+	if !ok {
+		t.Fatal("Result is not a *UnsupportedFeatureError")
+	}
+
+	if !strings.Contains(fte.Error(), "2.1.1") {
+		t.Error("UnsupportedFeatureError.Error doesn't contain version")
+	}
+
+	if !errors.Is(err, ErrNotSupported) {
+		t.Error("UnsupportedFeatureError is not ErrNotSupported")
+	}
+
+	err2 := fn()
+	if err != err2 {
+		t.Error("Didn't cache an error wrapping ErrNotSupported")
+	}
+
+	fn = NewFeatureTest("bar", "2.1.1", func() error {
+		return errors.New("foo")
+	})
+
+	err1, err2 := fn(), fn()
+	if err1 == err2 {
+		t.Error("Cached result of unsuccessful execution")
+	}
+}
@@ -0,0 +1,128 @@
+package internal
+
+import (
+	"bufio"
+	"bytes"
+	"compress/gzip"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"sync"
+)
+
+// NewBufferedSectionReader wraps an io.ReaderAt in an appropriately-sized
+// buffered reader. It is a convenience function for reading subsections of
+// ELF sections while minimizing the amount of read() syscalls made.
+//
+// Syscall overhead is non-negligible in continuous integration context
+// where ELFs might be accessed over virtual filesystems with poor random
+// access performance. Buffering reads makes sense because (sub)sections
+// end up being read completely anyway.
+//
+// Use instead of the r.Seek() + io.LimitReader() pattern.
+func NewBufferedSectionReader(ra io.ReaderAt, off, n int64) *bufio.Reader {
+	// Clamp the size of the buffer to one page to avoid slurping large parts
+	// of a file into memory. bufio.NewReader uses a hardcoded default buffer
+	// of 4096. Allow arches with larger pages to allocate more, but don't
+	// allocate a fixed 4k buffer if we only need to read a small segment.
+	buf := n
+	if ps := int64(os.Getpagesize()); n > ps {
+		buf = ps
+	}
+
+	return bufio.NewReaderSize(io.NewSectionReader(ra, off, n), int(buf))
+}
+
+// DiscardZeroes makes sure that all written bytes are zero
+// before discarding them.
+type DiscardZeroes struct{}
+
+func (DiscardZeroes) Write(p []byte) (int, error) {
+	for _, b := range p {
+		if b != 0 {
+			return 0, errors.New("encountered non-zero byte")
+		}
+	}
+	return len(p), nil
+}
+
+// ReadAllCompressed decompresses a gzipped file into memory.
+func ReadAllCompressed(file string) ([]byte, error) {
+	fh, err := os.Open(file)
+	if err != nil {
+		return nil, err
+	}
+	defer fh.Close()
+
+	gz, err := gzip.NewReader(fh)
+	if err != nil {
+		return nil, err
+	}
+	defer gz.Close()
+
+	return io.ReadAll(gz)
+}
+
+// ReadUint64FromFile reads a uint64 from a file.
+//
+// format specifies the contents of the file in fmt.Scanf syntax.
+func ReadUint64FromFile(format string, path ...string) (uint64, error) {
+	filename := filepath.Join(path...)
+	data, err := os.ReadFile(filename)
+	if err != nil {
+		return 0, fmt.Errorf("reading file %q: %w", filename, err)
+	}
+
+	var value uint64
+	n, err := fmt.Fscanf(bytes.NewReader(data), format, &value)
+	if err != nil {
+		return 0, fmt.Errorf("parsing file %q: %w", filename, err)
+	}
+	if n != 1 {
+		return 0, fmt.Errorf("parsing file %q: expected 1 item, got %d", filename, n)
+	}
+
+	return value, nil
+}
+
+type uint64FromFileKey struct {
+	format, path string
+}
+
+var uint64FromFileCache = struct {
+	sync.RWMutex
+	values map[uint64FromFileKey]uint64
+}{
+	values: map[uint64FromFileKey]uint64{},
+}
+
+// ReadUint64FromFileOnce is like readUint64FromFile but memoizes the result.
+func ReadUint64FromFileOnce(format string, path ...string) (uint64, error) {
+	filename := filepath.Join(path...)
+	key := uint64FromFileKey{format, filename}
+
+	uint64FromFileCache.RLock()
+	if value, ok := uint64FromFileCache.values[key]; ok {
+		uint64FromFileCache.RUnlock()
+		return value, nil
+	}
+	uint64FromFileCache.RUnlock()
+
+	value, err := ReadUint64FromFile(format, filename)
+	if err != nil {
+		return 0, err
+	}
+
+	uint64FromFileCache.Lock()
+	defer uint64FromFileCache.Unlock()
+
+	if value, ok := uint64FromFileCache.values[key]; ok {
+		// Someone else got here before us, use what is cached.
+		return value, nil
+	}
+
+	uint64FromFileCache.values[key] = value
+	return value, nil
+}
@@ -0,0 +1,19 @@
+package internal
+
+import (
+	"bytes"
+	"io"
+	"testing"
+)
+
+func TestDiscardZero(t *testing.T) {
+	_, err := io.Copy(DiscardZeroes{}, bytes.NewReader([]byte{0, 0, 0}))
+	if err != nil {
+		t.Error("Returned an error even though input was zero:", err)
+	}
+
+	_, err = io.Copy(DiscardZeroes{}, bytes.NewReader([]byte{1}))
+	if err == nil {
+		t.Error("No error even though input is non-zero")
+	}
+}
@@ -0,0 +1,267 @@
+package kconfig
+
+import (
+	"bufio"
+	"bytes"
+	"compress/gzip"
+	"fmt"
+	"io"
+	"math"
+	"os"
+	"strconv"
+	"strings"
+
+	"github.com/cilium/ebpf/btf"
+	"github.com/cilium/ebpf/internal"
+)
+
+// Find find a kconfig file on the host.
+// It first reads from /boot/config- of the current running kernel and tries
+// /proc/config.gz if nothing was found in /boot.
+// If none of the file provide a kconfig, it returns an error.
+func Find() (*os.File, error) {
+	kernelRelease, err := internal.KernelRelease()
+	if err != nil {
+		return nil, fmt.Errorf("cannot get kernel release: %w", err)
+	}
+
+	path := "/boot/config-" + kernelRelease
+	f, err := os.Open(path)
+	if err == nil {
+		return f, nil
+	}
+
+	f, err = os.Open("/proc/config.gz")
+	if err == nil {
+		return f, nil
+	}
+
+	return nil, fmt.Errorf("neither %s nor /proc/config.gz provide a kconfig", path)
+}
+
+// Parse parses the kconfig file for which a reader is given.
+// All the CONFIG_* which are in filter and which are set set will be
+// put in the returned map as key with their corresponding value as map value.
+// If filter is nil, no filtering will occur.
+// If the kconfig file is not valid, error will be returned.
+func Parse(source io.ReaderAt, filter map[string]struct{}) (map[string]string, error) {
+	var r io.Reader
+	zr, err := gzip.NewReader(io.NewSectionReader(source, 0, math.MaxInt64))
+	if err != nil {
+		r = io.NewSectionReader(source, 0, math.MaxInt64)
+	} else {
+		// Source is gzip compressed, transparently decompress.
+		r = zr
+	}
+
+	ret := make(map[string]string, len(filter))
+
+	s := bufio.NewScanner(r)
+
+	for s.Scan() {
+		line := s.Bytes()
+		err = processKconfigLine(line, ret, filter)
+		if err != nil {
+			return nil, fmt.Errorf("cannot parse line: %w", err)
+		}
+
+		if filter != nil && len(ret) == len(filter) {
+			break
+		}
+	}
+
+	if err := s.Err(); err != nil {
+		return nil, fmt.Errorf("cannot parse: %w", err)
+	}
+
+	if zr != nil {
+		return ret, zr.Close()
+	}
+
+	return ret, nil
+}
+
+// Golang translation of libbpf bpf_object__process_kconfig_line():
+// https://github.com/libbpf/libbpf/blob/fbd60dbff51c870f5e80a17c4f2fd639eb80af90/src/libbpf.c#L1874
+// It does the same checks but does not put the data inside the BPF map.
+func processKconfigLine(line []byte, m map[string]string, filter map[string]struct{}) error {
+	// Ignore empty lines and "# CONFIG_* is not set".
+	if !bytes.HasPrefix(line, []byte("CONFIG_")) {
+		return nil
+	}
+
+	key, value, found := bytes.Cut(line, []byte{'='})
+	if !found {
+		return fmt.Errorf("line %q does not contain separator '='", line)
+	}
+
+	if len(value) == 0 {
+		return fmt.Errorf("line %q has no value", line)
+	}
+
+	if filter != nil {
+		// NB: map[string(key)] gets special optimisation help from the compiler
+		// and doesn't allocate. Don't turn this into a variable.
+		_, ok := filter[string(key)]
+		if !ok {
+			return nil
+		}
+	}
+
+	// This can seem odd, but libbpf only sets the value the first time the key is
+	// met:
+	// https://github.com/torvalds/linux/blob/0d85b27b0cc6/tools/lib/bpf/libbpf.c#L1906-L1908
+	_, ok := m[string(key)]
+	if !ok {
+		m[string(key)] = string(value)
+	}
+
+	return nil
+}
+
+// PutValue translates the value given as parameter depending on the BTF
+// type, the translated value is then written to the byte array.
+func PutValue(data []byte, typ btf.Type, value string) error {
+	typ = btf.UnderlyingType(typ)
+
+	switch value {
+	case "y", "n", "m":
+		return putValueTri(data, typ, value)
+	default:
+		if strings.HasPrefix(value, `"`) {
+			return putValueString(data, typ, value)
+		}
+		return putValueNumber(data, typ, value)
+	}
+}
+
+// Golang translation of libbpf_tristate enum:
+// https://github.com/libbpf/libbpf/blob/fbd60dbff51c870f5e80a17c4f2fd639eb80af90/src/bpf_helpers.h#L169
+type triState int
+
+const (
+	TriNo     triState = 0
+	TriYes    triState = 1
+	TriModule triState = 2
+)
+
+func putValueTri(data []byte, typ btf.Type, value string) error {
+	switch v := typ.(type) {
+	case *btf.Int:
+		if v.Encoding != btf.Bool {
+			return fmt.Errorf("cannot add tri value, expected btf.Bool, got: %v", v.Encoding)
+		}
+
+		if v.Size != 1 {
+			return fmt.Errorf("cannot add tri value, expected size of 1 byte, got: %d", v.Size)
+		}
+
+		switch value {
+		case "y":
+			data[0] = 1
+		case "n":
+			data[0] = 0
+		default:
+			return fmt.Errorf("cannot use %q for btf.Bool", value)
+		}
+	case *btf.Enum:
+		if v.Name != "libbpf_tristate" {
+			return fmt.Errorf("cannot use enum %q, only libbpf_tristate is supported", v.Name)
+		}
+
+		var tri triState
+		switch value {
+		case "y":
+			tri = TriYes
+		case "m":
+			tri = TriModule
+		case "n":
+			tri = TriNo
+		default:
+			return fmt.Errorf("value %q is not support for libbpf_tristate", value)
+		}
+
+		internal.NativeEndian.PutUint64(data, uint64(tri))
+	default:
+		return fmt.Errorf("cannot add number value, expected btf.Int or btf.Enum, got: %T", v)
+	}
+
+	return nil
+}
+
+func putValueString(data []byte, typ btf.Type, value string) error {
+	array, ok := typ.(*btf.Array)
+	if !ok {
+		return fmt.Errorf("cannot add string value, expected btf.Array, got %T", array)
+	}
+
+	contentType, ok := btf.UnderlyingType(array.Type).(*btf.Int)
+	if !ok {
+		return fmt.Errorf("cannot add string value, expected array of btf.Int, got %T", contentType)
+	}
+
+	// Any Int, which is not bool, of one byte could be used to store char:
+	// https://github.com/torvalds/linux/blob/1a5304fecee5/tools/lib/bpf/libbpf.c#L3637-L3638
+	if contentType.Size != 1 && contentType.Encoding != btf.Bool {
+		return fmt.Errorf("cannot add string value, expected array of btf.Int of size 1, got array of btf.Int of size: %v", contentType.Size)
+	}
+
+	if !strings.HasPrefix(value, `"`) || !strings.HasSuffix(value, `"`) {
+		return fmt.Errorf(`value %q must start and finish with '"'`, value)
+	}
+
+	str := strings.Trim(value, `"`)
+
+	// We need to trim string if the bpf array is smaller.
+	if uint32(len(str)) >= array.Nelems {
+		str = str[:array.Nelems]
+	}
+
+	// Write the string content to .kconfig.
+	copy(data, str)
+
+	return nil
+}
+
+func putValueNumber(data []byte, typ btf.Type, value string) error {
+	integer, ok := typ.(*btf.Int)
+	if !ok {
+		return fmt.Errorf("cannot add number value, expected *btf.Int, got: %T", integer)
+	}
+
+	size := integer.Size
+	sizeInBits := size * 8
+
+	var n uint64
+	var err error
+	if integer.Encoding == btf.Signed {
+		parsed, e := strconv.ParseInt(value, 0, int(sizeInBits))
+
+		n = uint64(parsed)
+		err = e
+	} else {
+		parsed, e := strconv.ParseUint(value, 0, int(sizeInBits))
+
+		n = uint64(parsed)
+		err = e
+	}
+
+	if err != nil {
+		return fmt.Errorf("cannot parse value: %w", err)
+	}
+
+	switch size {
+	case 1:
+		data[0] = byte(n)
+	case 2:
+		internal.NativeEndian.PutUint16(data, uint16(n))
+	case 4:
+		internal.NativeEndian.PutUint32(data, uint32(n))
+	case 8:
+		internal.NativeEndian.PutUint64(data, uint64(n))
+	default:
+		return fmt.Errorf("size (%d) is not valid, expected: 1, 2, 4 or 8", size)
+	}
+
+	return nil
+}
@@ -0,0 +1,418 @@
+package kconfig
+
+import (
+	"bytes"
+	"encoding/binary"
+	"os"
+	"testing"
+
+	"github.com/cilium/ebpf/btf"
+	"github.com/cilium/ebpf/internal"
+
+	qt "github.com/frankban/quicktest"
+)
+
+func BenchmarkParse(b *testing.B) {
+	f, err := os.Open("testdata/config-6.2.15-300.fc38.x86_64.gz")
+	if err != nil {
+		b.Fatal(err)
+	}
+	defer f.Close()
+
+	b.ReportAllocs()
+	b.ResetTimer()
+
+	for n := 0; n < b.N; n++ {
+		_, err := Parse(f, nil)
+		if err != nil {
+			b.Fatal(err)
+		}
+	}
+}
+
+func BenchmarkParseFiltered(b *testing.B) {
+	f, err := os.Open("testdata/config-6.2.15-300.fc38.x86_64.gz")
+	if err != nil {
+		b.Fatal(err)
+	}
+	defer f.Close()
+
+	b.ReportAllocs()
+	b.ResetTimer()
+
+	// CONFIG_ARCH_USE_MEMTEST is the last CONFIG_ in the file.
+	// So, we will easily be able to see how many allocated bytes the filtering
+	// permits reducing compared to unfiltered benchmark.
+	filter := map[string]struct{}{"CONFIG_ARCH_USE_MEMTEST": {}}
+
+	for n := 0; n < b.N; n++ {
+		_, err := Parse(f, filter)
+		if err != nil {
+			b.Fatal(err)
+		}
+	}
+}
+
+func TestParse(t *testing.T) {
+	t.Parallel()
+
+	f, err := os.Open("testdata/test.kconfig")
+	if err != nil {
+		t.Fatal("Error reading /testdata/test.kconfig: ", err)
+	}
+	defer f.Close()
+
+	config, err := Parse(f, nil)
+	if err != nil {
+		t.Fatal("Error parsing kconfig: ", err)
+	}
+
+	expected := map[string]string{
+		"CONFIG_TRISTATE": "m",
+		"CONFIG_BOOL":     "y",
+		"CONFIG_CHAR":     "100",
+		"CONFIG_USHORT":   "30000",
+		"CONFIG_INT":      "123456",
+		"CONFIG_ULONG":    "0xDEADBEEFC0DE",
+		"CONFIG_STR":      `"abracad"`,
+		"CONFIG_FOO":      `"foo"`,
+	}
+	qt.Assert(t, config, qt.DeepEquals, expected)
+}
+
+func TestParseFiltered(t *testing.T) {
+	t.Parallel()
+
+	f, err := os.Open("testdata/test.kconfig")
+	if err != nil {
+		t.Fatal("Error reading /testdata/test.kconfig: ", err)
+	}
+	defer f.Close()
+
+	filter := map[string]struct{}{"CONFIG_FOO": {}}
+
+	config, err := Parse(f, filter)
+	if err != nil {
+		t.Fatal("Error parsing gzipped kconfig: ", err)
+	}
+
+	expected := map[string]string{"CONFIG_FOO": `"foo"`}
+	qt.Assert(t, config, qt.DeepEquals, expected)
+}
+
+func TestParseGzipped(t *testing.T) {
+	t.Parallel()
+
+	f, err := os.Open("testdata/config-6.2.15-300.fc38.x86_64.gz")
+	if err != nil {
+		t.Fatal("Error reading /testdata/config-6.2.15-300.fc38.x86_64.gz: ", err)
+	}
+	defer f.Close()
+
+	_, err = Parse(f, nil)
+	if err != nil {
+		t.Fatal("Error parsing gzipped kconfig: ", err)
+	}
+}
+
+func TestParseGzippedFiltered(t *testing.T) {
+	t.Parallel()
+
+	f, err := os.Open("testdata/config-6.2.15-300.fc38.x86_64.gz")
+	if err != nil {
+		t.Fatal("Error reading /testdata/config-6.2.15-300.fc38.x86_64.gz: ", err)
+	}
+	defer f.Close()
+
+	filter := map[string]struct{}{"CONFIG_HZ": {}}
+
+	config, err := Parse(f, filter)
+	if err != nil {
+		t.Fatal("Error parsing gzipped kconfig: ", err)
+	}
+
+	expected := map[string]string{"CONFIG_HZ": "1000"}
+	qt.Assert(t, config, qt.DeepEquals, expected)
+}
+
+func TestProcessKconfigBadLine(t *testing.T) {
+	t.Parallel()
+
+	m := make(map[string]string)
+
+	err := processKconfigLine([]byte("CONFIG_FOO"), m, nil)
+	qt.Assert(t, err, qt.IsNotNil, qt.Commentf("line has no '='"))
+
+	err = processKconfigLine([]byte("CONFIG_FOO="), m, nil)
+	qt.Assert(t, err, qt.IsNotNil, qt.Commentf("line has no value"))
+}
+
+func TestPutValue(t *testing.T) {
+	t.Parallel()
+
+	type testCase struct {
+		typ      btf.Type
+		value    string
+		expected any
+		comment  string
+	}
+
+	cases := []testCase{
+		{
+			typ: &btf.Int{
+				Size:     1,
+				Encoding: btf.Bool,
+			},
+			value:    "n",
+			expected: int8(0),
+		},
+		{
+			typ: &btf.Int{
+				Size:     1,
+				Encoding: btf.Bool,
+			},
+			value:    "y",
+			expected: int8(1),
+		},
+		{
+			typ: &btf.Int{
+				Size:     1,
+				Encoding: btf.Bool,
+			},
+			value:   "foo",
+			comment: "Bad value",
+		},
+		{
+			typ:     &btf.Int{},
+			comment: "Encoding is not Bool",
+		},
+		{
+			typ: &btf.Int{
+				Encoding: btf.Bool,
+			},
+			comment: "Size is not 1",
+		},
+		{
+			typ: &btf.Enum{
+				Name: "libbpf_tristate",
+			},
+			value:    "y",
+			expected: int64(TriYes),
+		},
+		{
+			typ: &btf.Enum{
+				Name: "libbpf_tristate",
+			},
+			value:    "n",
+			expected: int64(TriNo),
+		},
+		{
+			typ: &btf.Enum{
+				Name: "libbpf_tristate",
+			},
+			value:    "m",
+			expected: int64(TriModule),
+		},
+		{
+			typ: &btf.Enum{
+				Name: "libbpf_tristate",
+			},
+			value:   "foo",
+			comment: "Bad value",
+		},
+		{
+			typ: &btf.Enum{
+				Name: "error",
+			},
+			comment: "Enum name is wrong",
+		},
+		{
+			typ:     &btf.Array{},
+			value:   "y",
+			comment: "Type is not btf.Int",
+		},
+		{
+			typ: &btf.Int{
+				Size: 1,
+			},
+			value:    "255",
+			expected: uint8(255),
+		},
+		{
+			typ: &btf.Int{
+				Size: 2,
+			},
+			value:    "0xcafe",
+			expected: uint16(0xcafe),
+		},
+		{
+			typ: &btf.Int{
+				Size: 2,
+			},
+			value:    "0755",
+			expected: uint16(0755),
+		},
+		{
+			typ: &btf.Int{
+				Size:     4,
+				Encoding: btf.Signed,
+			},
+			value:    "-2147483648",
+			expected: int32(-2147483648),
+		},
+		{
+			typ: &btf.Int{
+				Size:     4,
+				Encoding: btf.Signed,
+			},
+			value:    "+2147483647",
+			expected: int32(+2147483647),
+		},
+		{
+			typ: &btf.Int{
+				Size: 4,
+			},
+			value:    "0xcafec0de",
+			expected: uint32(0xcafec0de),
+		},
+		{
+			typ: &btf.Int{
+				Size:     8,
+				Encoding: btf.Signed,
+			},
+			value:    "+1000000000000",
+			expected: int64(1000000000000),
+		},
+		{
+			typ: &btf.Int{
+				Size: 8,
+			},
+			value:    "1000000000000",
+			expected: uint64(1000000000000),
+		},
+		{
+			typ: &btf.Int{
+				Size: 1,
+			},
+			value:   "foo",
+			comment: "Value is not an int",
+		},
+		{
+			typ:     &btf.Array{},
+			value:   "1",
+			comment: "Type is not btf.Int",
+		},
+		{
+			typ: &btf.Int{
+				Size: 16,
+			},
+			value:   "1",
+			comment: "Size is wrong",
+		},
+		{
+			typ: &btf.Typedef{
+				Type: &btf.Int{
+					Size: 1,
+				},
+			},
+			value:    "1",
+			expected: uint8(1),
+		},
+		{
+			typ: &btf.Array{
+				Type: &btf.Int{
+					Size:     1,
+					Encoding: btf.Char,
+				},
+				Nelems: 6,
+			},
+			value:    `"foobar"`,
+			expected: []byte("foobar"),
+		},
+		{
+			typ: &btf.Array{
+				Type: &btf.Int{
+					Size:     1,
+					Encoding: btf.Unsigned,
+				},
+				Nelems: 3,
+			},
+			value:    `"foobar"`,
+			expected: []byte("foo"),
+		},
+		{
+			typ: &btf.Array{
+				Type: &btf.Int{
+					Size:     1,
+					Encoding: btf.Signed,
+				},
+				Nelems: 2,
+			},
+			value:    `"42"`,
+			expected: []byte("42"),
+		},
+		{
+			typ:     &btf.Int{},
+			value:   `"foo"`,
+			comment: "Type is not btf.Array",
+		},
+		{
+			typ:     &btf.Array{},
+			value:   `"foo"`,
+			comment: "Type is not btf.Array of btf.Int",
+		},
+		{
+			typ: &btf.Array{
+				Type: &btf.Int{
+					Size:     1,
+					Encoding: btf.Bool,
+				},
+			},
+			comment: "Type is not btf.Array of btf.Int of size 1 which is not btf.Bool",
+		},
+		{
+			typ: &btf.Array{
+				Type: &btf.Int{
+					Size:     4,
+					Encoding: btf.Char,
+				},
+			},
+			value:   `"foo"`,
+			comment: "Type is not btf.Array of btf.Char of size 1",
+		},
+		{
+			typ: &btf.Array{
+				Type: &btf.Int{
+					Size:     1,
+					Encoding: btf.Char,
+				},
+			},
+			value:   `"foo`,
+			comment: `Value does not start and end with '"'`,
+		},
+	}
+
+	for _, c := range cases {
+		if len(c.comment) > 0 {
+			err := PutValue(make([]byte, 0), c.typ, c.value)
+
+			qt.Assert(t, err, qt.IsNotNil, qt.Commentf(c.comment))
+
+			continue
+		}
+
+		var buf bytes.Buffer
+		err := binary.Write(&buf, internal.NativeEndian, c.expected)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		expected := buf.Bytes()
+		data := make([]byte, len(expected))
+		err = PutValue(data, c.typ, c.value)
+
+		qt.Assert(t, err, qt.IsNil)
+
+		qt.Assert(t, data, qt.DeepEquals, expected)
+	}
+}
@@ -0,0 +1,11 @@
+CONFIG_TRISTATE=m
+# CONFIG_IS_NOT_SET is not set
+CONFIG_BOOL=y
+CONFIG_CHAR=100
+
+CONFIG_USHORT=30000
+CONFIG_INT=123456
+CONFIG_ULONG=0xDEADBEEFC0DE
+CONFIG_STR="abracad"
+CONFIG_FOO="foo"
+CONFIG_FOO="bar"
@@ -0,0 +1,26 @@
+package internal
+
+import (
+	"sync"
+)
+
+type memoizedFunc[T any] struct {
+	once   sync.Once
+	fn     func() (T, error)
+	result T
+	err    error
+}
+
+func (mf *memoizedFunc[T]) do() (T, error) {
+	mf.once.Do(func() {
+		mf.result, mf.err = mf.fn()
+	})
+	return mf.result, mf.err
+}
+
+// Memoize the result of a function call.
+//
+// fn is only ever called once, even if it returns an error.
+func Memoize[T any](fn func() (T, error)) func() (T, error) {
+	return (&memoizedFunc[T]{fn: fn}).do
+}
@@ -0,0 +1,97 @@
+package internal
+
+import (
+	"bytes"
+	"errors"
+	"go/format"
+	"go/scanner"
+	"io"
+	"reflect"
+	"strings"
+	"unicode"
+)
+
+// Identifier turns a C style type or field name into an exportable Go equivalent.
+func Identifier(str string) string {
+	prev := rune(-1)
+	return strings.Map(func(r rune) rune {
+		// See https://golang.org/ref/spec#Identifiers
+		switch {
+		case unicode.IsLetter(r):
+			if prev == -1 {
+				r = unicode.ToUpper(r)
+			}
+
+		case r == '_':
+			switch {
+			// The previous rune was deleted, or we are at the
+			// beginning of the string.
+			case prev == -1:
+				fallthrough
+
+			// The previous rune is a lower case letter or a digit.
+			case unicode.IsDigit(prev) || (unicode.IsLetter(prev) && unicode.IsLower(prev)):
+				// delete the current rune, and force the
+				// next character to be uppercased.
+				r = -1
+			}
+
+		case unicode.IsDigit(r):
+
+		default:
+			// Delete the current rune. prev is unchanged.
+			return -1
+		}
+
+		prev = r
+		return r
+	}, str)
+}
+
+// WriteFormatted outputs a formatted src into out.
+//
+// If formatting fails it returns an informative error message.
+func WriteFormatted(src []byte, out io.Writer) error {
+	formatted, err := format.Source(src)
+	if err == nil {
+		_, err = out.Write(formatted)
+		return err
+	}
+
+	var el scanner.ErrorList
+	if !errors.As(err, &el) {
+		return err
+	}
+
+	var nel scanner.ErrorList
+	for _, err := range el {
+		if !err.Pos.IsValid() {
+			nel = append(nel, err)
+			continue
+		}
+
+		buf := src[err.Pos.Offset:]
+		nl := bytes.IndexRune(buf, '\n')
+		if nl == -1 {
+			nel = append(nel, err)
+			continue
+		}
+
+		err.Msg += ": " + string(buf[:nl])
+		nel = append(nel, err)
+	}
+
+	return nel
+}
+
+// GoTypeName is like %T, but elides the package name.
+//
+// Pointers to a type are peeled off.
+func GoTypeName(t any) string {
+	rT := reflect.TypeOf(t)
+	for rT.Kind() == reflect.Pointer {
+		rT = rT.Elem()
+	}
+	// Doesn't return the correct Name for generic types due to https://github.com/golang/go/issues/55924
+	return rT.Name()
+}
@@ -0,0 +1,41 @@
+package internal
+
+import (
+	"testing"
+
+	qt "github.com/frankban/quicktest"
+)
+
+func TestIdentifier(t *testing.T) {
+	testcases := []struct {
+		in, out string
+	}{
+		{".rodata", "Rodata"},
+		{"_foo_bar_", "FooBar"},
+		{"ipv6_test", "Ipv6Test"},
+		{"FOO_BAR", "FOO_BAR"},
+		{"FOO_", "FOO_"},
+		{"FOO__BAR", "FOO__BAR"},
+		{"FOO___BAR", "FOO___BAR"},
+		{"_FOO__BAR", "FOO__BAR"},
+		{"__FOO__BAR", "FOO__BAR"},
+	}
+
+	for _, tc := range testcases {
+		have := Identifier(tc.in)
+		if have != tc.out {
+			t.Errorf("Expected %q as output of %q, got %q", tc.out, tc.in, have)
+		}
+	}
+}
+
+func TestGoTypeName(t *testing.T) {
+	type foo struct{}
+	type bar[T any] struct{}
+	qt.Assert(t, GoTypeName(foo{}), qt.Equals, "foo")
+	qt.Assert(t, GoTypeName(new(foo)), qt.Equals, "foo")
+	qt.Assert(t, GoTypeName(new(*foo)), qt.Equals, "foo")
+	qt.Assert(t, GoTypeName(bar[int]{}), qt.Equals, "bar[int]")
+	// Broken in the stdlib, see GoTypeName for details.
+	// qt.Assert(t, GoTypeName(bar[qt.C]{}), qt.Equals, "bar[quicktest.C]")
+}
@@ -0,0 +1,65 @@
+package internal
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+	"runtime"
+
+	"github.com/cilium/ebpf/internal/sys"
+	"github.com/cilium/ebpf/internal/unix"
+)
+
+func Pin(currentPath, newPath string, fd *sys.FD) error {
+	if newPath == "" {
+		return errors.New("given pinning path cannot be empty")
+	}
+	if currentPath == newPath {
+		return nil
+	}
+
+	fsType, err := FSType(filepath.Dir(newPath))
+	if err != nil {
+		return err
+	}
+	if fsType != unix.BPF_FS_MAGIC {
+		return fmt.Errorf("%s is not on a bpf filesystem", newPath)
+	}
+
+	defer runtime.KeepAlive(fd)
+
+	if currentPath == "" {
+		return sys.ObjPin(&sys.ObjPinAttr{
+			Pathname: sys.NewStringPointer(newPath),
+			BpfFd:    fd.Uint(),
+		})
+	}
+
+	// Renameat2 is used instead of os.Rename to disallow the new path replacing
+	// an existing path.
+	err = unix.Renameat2(unix.AT_FDCWD, currentPath, unix.AT_FDCWD, newPath, unix.RENAME_NOREPLACE)
+	if err == nil {
+		// Object is now moved to the new pinning path.
+		return nil
+	}
+	if !os.IsNotExist(err) {
+		return fmt.Errorf("unable to move pinned object to new path %v: %w", newPath, err)
+	}
+	// Internal state not in sync with the file system so let's fix it.
+	return sys.ObjPin(&sys.ObjPinAttr{
+		Pathname: sys.NewStringPointer(newPath),
+		BpfFd:    fd.Uint(),
+	})
+}
+
+func Unpin(pinnedPath string) error {
+	if pinnedPath == "" {
+		return nil
+	}
+	err := os.Remove(pinnedPath)
+	if err == nil || os.IsNotExist(err) {
+		return nil
+	}
+	return err
+}
@@ -0,0 +1,43 @@
+package internal
+
+import (
+	"runtime"
+)
+
+// PlatformPrefix returns the platform-dependent syscall wrapper prefix used by
+// the linux kernel.
+//
+// Based on https://github.com/golang/go/blob/master/src/go/build/syslist.go
+// and https://github.com/libbpf/libbpf/blob/master/src/libbpf.c#L10047
+func PlatformPrefix() string {
+	switch runtime.GOARCH {
+	case "386":
+		return "__ia32_"
+	case "amd64", "amd64p32":
+		return "__x64_"
+
+	case "arm", "armbe":
+		return "__arm_"
+	case "arm64", "arm64be":
+		return "__arm64_"
+
+	case "mips", "mipsle", "mips64", "mips64le", "mips64p32", "mips64p32le":
+		return "__mips_"
+
+	case "s390":
+		return "__s390_"
+	case "s390x":
+		return "__s390x_"
+
+	case "riscv", "riscv64":
+		return "__riscv_"
+
+	case "ppc":
+		return "__powerpc_"
+	case "ppc64", "ppc64le":
+		return "__powerpc64_"
+
+	default:
+		return ""
+	}
+}
@@ -0,0 +1,11 @@
+package internal
+
+// EmptyBPFContext is the smallest-possible BPF input context to be used for
+// invoking `Program.{Run,Benchmark,Test}`.
+//
+// Programs require a context input buffer of at least 15 bytes. Looking in
+// net/bpf/test_run.c, bpf_test_init() requires that the input is at least
+// ETH_HLEN (14) bytes. As of Linux commit fd18942 ("bpf: Don't redirect packets
+// with invalid pkt_len"), it also requires the skb to be non-empty after
+// removing the Layer 2 header.
+var EmptyBPFContext = make([]byte, 15)
@@ -0,0 +1,23 @@
+package internal
+
+import (
+	"unsafe"
+
+	"github.com/cilium/ebpf/internal/unix"
+)
+
+func FSType(path string) (int64, error) {
+	var statfs unix.Statfs_t
+	if err := unix.Statfs(path, &statfs); err != nil {
+		return 0, err
+	}
+
+	fsType := int64(statfs.Type)
+	if unsafe.Sizeof(statfs.Type) == 4 {
+		// We're on a 32 bit arch, where statfs.Type is int32. bpfFSType is a
+		// negative number when interpreted as int32 so we need to cast via
+		// uint32 to avoid sign extension.
+		fsType = int64(uint32(statfs.Type))
+	}
+	return fsType, nil
+}
@@ -0,0 +1,23 @@
+package internal
+
+import (
+	"testing"
+
+	"github.com/cilium/ebpf/internal/unix"
+
+	qt "github.com/frankban/quicktest"
+)
+
+func TestFSType(t *testing.T) {
+	for _, fs := range []struct {
+		path  string
+		magic int64
+	}{
+		{"/sys/kernel/tracing", unix.TRACEFS_MAGIC},
+		{"/sys/fs/bpf", unix.BPF_FS_MAGIC},
+	} {
+		fst, err := FSType(fs.path)
+		qt.Assert(t, err, qt.IsNil)
+		qt.Assert(t, fst, qt.Equals, fs.magic)
+	}
+}
@@ -0,0 +1,6 @@
+// Package sys contains bindings for the BPF syscall.
+package sys
+
+// Regenerate types.go by invoking go generate in the current directory.
+
+//go:generate go run github.com/cilium/ebpf/internal/cmd/gentypes ../../btf/testdata/vmlinux.btf.gz
@@ -0,0 +1,133 @@
+package sys
+
+import (
+	"fmt"
+	"math"
+	"os"
+	"runtime"
+	"strconv"
+
+	"github.com/cilium/ebpf/internal/unix"
+)
+
+var ErrClosedFd = unix.EBADF
+
+type FD struct {
+	raw int
+}
+
+func newFD(value int) *FD {
+	if onLeakFD != nil {
+		// Attempt to store the caller's stack for the given fd value.
+		// Panic if fds contains an existing stack for the fd.
+		old, exist := fds.LoadOrStore(value, callersFrames())
+		if exist {
+			f := old.(*runtime.Frames)
+			panic(fmt.Sprintf("found existing stack for fd %d:\n%s", value, FormatFrames(f)))
+		}
+	}
+
+	fd := &FD{value}
+	runtime.SetFinalizer(fd, (*FD).finalize)
+	return fd
+}
+
+// finalize is set as the FD's runtime finalizer and
+// sends a leak trace before calling FD.Close().
+func (fd *FD) finalize() {
+	if fd.raw < 0 {
+		return
+	}
+
+	// Invoke the fd leak callback. Calls LoadAndDelete to guarantee the callback
+	// is invoked at most once for one sys.FD allocation, runtime.Frames can only
+	// be unwound once.
+	f, ok := fds.LoadAndDelete(fd.Int())
+	if ok && onLeakFD != nil {
+		onLeakFD(f.(*runtime.Frames))
+	}
+
+	_ = fd.Close()
+}
+
+// NewFD wraps a raw fd with a finalizer.
+//
+// You must not use the raw fd after calling this function, since the underlying
+// file descriptor number may change. This is because the BPF UAPI assumes that
+// zero is not a valid fd value.
+func NewFD(value int) (*FD, error) {
+	if value < 0 {
+		return nil, fmt.Errorf("invalid fd %d", value)
+	}
+
+	fd := newFD(value)
+	if value != 0 {
+		return fd, nil
+	}
+
+	dup, err := fd.Dup()
+	_ = fd.Close()
+	return dup, err
+}
+
+func (fd *FD) String() string {
+	return strconv.FormatInt(int64(fd.raw), 10)
+}
+
+func (fd *FD) Int() int {
+	return fd.raw
+}
+
+func (fd *FD) Uint() uint32 {
+	if fd.raw < 0 || int64(fd.raw) > math.MaxUint32 {
+		// Best effort: this is the number most likely to be an invalid file
+		// descriptor. It is equal to -1 (on two's complement arches).
+		return math.MaxUint32
+	}
+	return uint32(fd.raw)
+}
+
+func (fd *FD) Close() error {
+	if fd.raw < 0 {
+		return nil
+	}
+
+	return unix.Close(fd.disown())
+}
+
+func (fd *FD) disown() int {
+	value := int(fd.raw)
+	fds.Delete(int(value))
+	fd.raw = -1
+
+	runtime.SetFinalizer(fd, nil)
+	return value
+}
+
+func (fd *FD) Dup() (*FD, error) {
+	if fd.raw < 0 {
+		return nil, ErrClosedFd
+	}
+
+	// Always require the fd to be larger than zero: the BPF API treats the value
+	// as "no argument provided".
+	dup, err := unix.FcntlInt(uintptr(fd.raw), unix.F_DUPFD_CLOEXEC, 1)
+	if err != nil {
+		return nil, fmt.Errorf("can't dup fd: %v", err)
+	}
+
+	return newFD(dup), nil
+}
+
+// File takes ownership of FD and turns it into an [*os.File].
+//
+// You must not use the FD after the call returns.
+//
+// Returns nil if the FD is not valid.
+func (fd *FD) File(name string) *os.File {
+	if fd.raw < 0 {
+		return nil
+	}
+
+	return os.NewFile(uintptr(fd.disown()), name)
+}
@@ -0,0 +1,66 @@
+package sys
+
+import (
+	"os"
+	"syscall"
+	"testing"
+
+	"github.com/cilium/ebpf/internal/unix"
+	qt "github.com/frankban/quicktest"
+)
+
+func init() {
+	// Free up fd 0 for TestFD.
+	stdin, err := unix.FcntlInt(os.Stdin.Fd(), unix.F_DUPFD_CLOEXEC, 1)
+	if err != nil {
+		panic(err)
+	}
+
+	old := os.Stdin
+	os.Stdin = os.NewFile(uintptr(stdin), "stdin")
+	old.Close()
+
+	reserveFdZero()
+}
+
+func reserveFdZero() {
+	fd, err := unix.Open(os.DevNull, syscall.O_RDONLY, 0)
+	if err != nil {
+		panic(err)
+	}
+	if fd != 0 {
+		panic(err)
+	}
+}
+
+func TestFD(t *testing.T) {
+	_, err := NewFD(-1)
+	qt.Assert(t, err, qt.IsNotNil, qt.Commentf("negative fd should be rejected"))
+
+	fd, err := NewFD(0)
+	qt.Assert(t, err, qt.IsNil)
+	qt.Assert(t, fd.Int(), qt.Not(qt.Equals), 0, qt.Commentf("fd value should not be zero"))
+
+	var stat unix.Stat_t
+	err = unix.Fstat(0, &stat)
+	qt.Assert(t, err, qt.ErrorIs, unix.EBADF, qt.Commentf("zero fd should be closed"))
+
+	reserveFdZero()
+}
+
+func TestFDFile(t *testing.T) {
+	fd := newFD(openFd(t))
+	file := fd.File("test")
+	qt.Assert(t, file, qt.IsNotNil)
+	qt.Assert(t, file.Close(), qt.IsNil)
+	qt.Assert(t, fd.File("closed"), qt.IsNil)
+
+	_, err := fd.Dup()
+	qt.Assert(t, err, qt.ErrorIs, ErrClosedFd)
+}
+
+func openFd(tb testing.TB) int {
+	fd, err := unix.Open(os.DevNull, syscall.O_RDONLY, 0)
+	qt.Assert(tb, err, qt.IsNil)
+	return fd
+}
@@ -0,0 +1,93 @@
+package sys
+
+import (
+	"bytes"
+	"fmt"
+	"runtime"
+	"sync"
+)
+
+// OnLeakFD controls tracing [FD] lifetime to detect resources that are not
+// closed by Close().
+//
+// If fn is not nil, tracing is enabled for all FDs created going forward. fn is
+// invoked for all FDs that are closed by the garbage collector instead of an
+// explicit Close() by a caller. Calling OnLeakFD twice with a non-nil fn
+// (without disabling tracing in the meantime) will cause a panic.
+//
+// If fn is nil, tracing will be disabled. Any FDs that have not been closed are
+// considered to be leaked, fn will be invoked for them, and the process will be
+// terminated.
+//
+// fn will be invoked at most once for every unique sys.FD allocation since a
+// runtime.Frames can only be unwound once.
+func OnLeakFD(fn func(*runtime.Frames)) {
+	// Enable leak tracing if new fn is provided.
+	if fn != nil {
+		if onLeakFD != nil {
+			panic("OnLeakFD called twice with non-nil fn")
+		}
+
+		onLeakFD = fn
+		return
+	}
+
+	// fn is nil past this point.
+
+	if onLeakFD == nil {
+		return
+	}
+
+	// Call onLeakFD for all open fds.
+	if fs := flushFrames(); len(fs) != 0 {
+		for _, f := range fs {
+			onLeakFD(f)
+		}
+	}
+
+	onLeakFD = nil
+}
+
+var onLeakFD func(*runtime.Frames)
+
+// fds is a registry of all file descriptors wrapped into sys.fds that were
+// created while an fd tracer was active.
+var fds sync.Map // map[int]*runtime.Frames
+
+// flushFrames removes all elements from fds and returns them as a slice. This
+// deals with the fact that a runtime.Frames can only be unwound once using
+// Next().
+func flushFrames() []*runtime.Frames {
+	var frames []*runtime.Frames
+	fds.Range(func(key, value any) bool {
+		frames = append(frames, value.(*runtime.Frames))
+		fds.Delete(key)
+		return true
+	})
+	return frames
+}
+
+func callersFrames() *runtime.Frames {
+	c := make([]uintptr, 32)
+
+	// Skip runtime.Callers and this function.
+	i := runtime.Callers(2, c)
+	if i == 0 {
+		return nil
+	}
+
+	return runtime.CallersFrames(c)
+}
+
+// FormatFrames formats a runtime.Frames as a human-readable string.
+func FormatFrames(fs *runtime.Frames) string {
+	var b bytes.Buffer
+	for {
+		f, more := fs.Next()
+		b.WriteString(fmt.Sprintf("\t%s+%#x\n\t\t%s:%d\n", f.Function, f.PC-f.Entry, f.File, f.Line))
+		if !more {
+			break
+		}
+	}
+	return b.String()
+}
@@ -0,0 +1,49 @@
+// Code generated by "stringer -type MapFlags"; DO NOT EDIT.
+
+package sys
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[BPF_F_NO_PREALLOC-1]
+	_ = x[BPF_F_NO_COMMON_LRU-2]
+	_ = x[BPF_F_NUMA_NODE-4]
+	_ = x[BPF_F_RDONLY-8]
+	_ = x[BPF_F_WRONLY-16]
+	_ = x[BPF_F_STACK_BUILD_ID-32]
+	_ = x[BPF_F_ZERO_SEED-64]
+	_ = x[BPF_F_RDONLY_PROG-128]
+	_ = x[BPF_F_WRONLY_PROG-256]
+	_ = x[BPF_F_CLONE-512]
+	_ = x[BPF_F_MMAPABLE-1024]
+	_ = x[BPF_F_PRESERVE_ELEMS-2048]
+	_ = x[BPF_F_INNER_MAP-4096]
+}
+
+const _MapFlags_name = "BPF_F_NO_PREALLOCBPF_F_NO_COMMON_LRUBPF_F_NUMA_NODEBPF_F_RDONLYBPF_F_WRONLYBPF_F_STACK_BUILD_IDBPF_F_ZERO_SEEDBPF_F_RDONLY_PROGBPF_F_WRONLY_PROGBPF_F_CLONEBPF_F_MMAPABLEBPF_F_PRESERVE_ELEMSBPF_F_INNER_MAP"
+
+var _MapFlags_map = map[MapFlags]string{
+	1:    _MapFlags_name[0:17],
+	2:    _MapFlags_name[17:36],
+	4:    _MapFlags_name[36:51],
+	8:    _MapFlags_name[51:63],
+	16:   _MapFlags_name[63:75],
+	32:   _MapFlags_name[75:95],
+	64:   _MapFlags_name[95:110],
+	128:  _MapFlags_name[110:127],
+	256:  _MapFlags_name[127:144],
+	512:  _MapFlags_name[144:155],
+	1024: _MapFlags_name[155:169],
+	2048: _MapFlags_name[169:189],
+	4096: _MapFlags_name[189:204],
+}
+
+func (i MapFlags) String() string {
+	if str, ok := _MapFlags_map[i]; ok {
+		return str
+	}
+	return "MapFlags(" + strconv.FormatInt(int64(i), 10) + ")"
+}
@@ -0,0 +1,52 @@
+package sys
+
+import (
+	"unsafe"
+
+	"github.com/cilium/ebpf/internal/unix"
+)
+
+// NewPointer creates a 64-bit pointer from an unsafe Pointer.
+func NewPointer(ptr unsafe.Pointer) Pointer {
+	return Pointer{ptr: ptr}
+}
+
+// NewSlicePointer creates a 64-bit pointer from a byte slice.
+func NewSlicePointer(buf []byte) Pointer {
+	if len(buf) == 0 {
+		return Pointer{}
+	}
+
+	return Pointer{ptr: unsafe.Pointer(&buf[0])}
+}
+
+// NewSlicePointerLen creates a 64-bit pointer from a byte slice.
+//
+// Useful to assign both the pointer and the length in one go.
+func NewSlicePointerLen(buf []byte) (Pointer, uint32) {
+	return NewSlicePointer(buf), uint32(len(buf))
+}
+
+// NewStringPointer creates a 64-bit pointer from a string.
+func NewStringPointer(str string) Pointer {
+	p, err := unix.BytePtrFromString(str)
+	if err != nil {
+		return Pointer{}
+	}
+
+	return Pointer{ptr: unsafe.Pointer(p)}
+}
+
+// NewStringSlicePointer allocates an array of Pointers to each string in the
+// given slice of strings and returns a 64-bit pointer to the start of the
+// resulting array.
+//
+// Use this function to pass arrays of strings as syscall arguments.
+func NewStringSlicePointer(strings []string) Pointer {
+	sp := make([]Pointer, 0, len(strings))
+	for _, s := range strings {
+		sp = append(sp, NewStringPointer(s))
+	}
+
+	return Pointer{ptr: unsafe.Pointer(&sp[0])}
+}
@@ -0,0 +1,14 @@
+//go:build armbe || mips || mips64p32
+
+package sys
+
+import (
+	"unsafe"
+)
+
+// Pointer wraps an unsafe.Pointer to be 64bit to
+// conform to the syscall specification.
+type Pointer struct {
+	pad uint32
+	ptr unsafe.Pointer
+}
@@ -0,0 +1,14 @@
+//go:build 386 || amd64p32 || arm || mipsle || mips64p32le
+
+package sys
+
+import (
+	"unsafe"
+)
+
+// Pointer wraps an unsafe.Pointer to be 64bit to
+// conform to the syscall specification.
+type Pointer struct {
+	ptr unsafe.Pointer
+	pad uint32
+}
@@ -0,0 +1,13 @@
+//go:build !386 && !amd64p32 && !arm && !mipsle && !mips64p32le && !armbe && !mips && !mips64p32
+
+package sys
+
+import (
+	"unsafe"
+)
+
+// Pointer wraps an unsafe.Pointer to be 64bit to
+// conform to the syscall specification.
+type Pointer struct {
+	ptr unsafe.Pointer
+}
@@ -0,0 +1,83 @@
+package sys
+
+import (
+	"fmt"
+	"runtime"
+	"unsafe"
+
+	"github.com/cilium/ebpf/internal/unix"
+)
+
+// A sigset containing only SIGPROF.
+var profSet unix.Sigset_t
+
+func init() {
+	// See sigsetAdd for details on the implementation. Open coded here so
+	// that the compiler will check the constant calculations for us.
+	profSet.Val[sigprofBit/wordBits] |= 1 << (sigprofBit % wordBits)
+}
+
+// maskProfilerSignal locks the calling goroutine to its underlying OS thread
+// and adds SIGPROF to the thread's signal mask. This prevents pprof from
+// interrupting expensive syscalls like e.g. BPF_PROG_LOAD.
+//
+// The caller must defer unmaskProfilerSignal() to reverse the operation.
+func maskProfilerSignal() {
+	runtime.LockOSThread()
+
+	if err := unix.PthreadSigmask(unix.SIG_BLOCK, &profSet, nil); err != nil {
+		runtime.UnlockOSThread()
+		panic(fmt.Errorf("masking profiler signal: %w", err))
+	}
+}
+
+// unmaskProfilerSignal removes SIGPROF from the underlying thread's signal
+// mask, allowing it to be interrupted for profiling once again.
+//
+// It also unlocks the current goroutine from its underlying OS thread.
+func unmaskProfilerSignal() {
+	defer runtime.UnlockOSThread()
+
+	if err := unix.PthreadSigmask(unix.SIG_UNBLOCK, &profSet, nil); err != nil {
+		panic(fmt.Errorf("unmasking profiler signal: %w", err))
+	}
+}
+
+const (
+	// Signal is the nth bit in the bitfield.
+	sigprofBit = int(unix.SIGPROF - 1)
+	// The number of bits in one Sigset_t word.
+	wordBits = int(unsafe.Sizeof(unix.Sigset_t{}.Val[0])) * 8
+)
+
+// sigsetAdd adds signal to set.
+//
+// Note: Sigset_t.Val's value type is uint32 or uint64 depending on the arch.
+// This function must be able to deal with both and so must avoid any direct
+// references to u32 or u64 types.
+func sigsetAdd(set *unix.Sigset_t, signal unix.Signal) error {
+	if signal < 1 {
+		return fmt.Errorf("signal %d must be larger than 0", signal)
+	}
+
+	// For amd64, runtime.sigaddset() performs the following operation:
+	// set[(signal-1)/32] |= 1 << ((uint32(signal) - 1) & 31)
+	//
+	// This trick depends on sigset being two u32's, causing a signal in the the
+	// bottom 31 bits to be written to the low word if bit 32 is low, or the high
+	// word if bit 32 is high.
+
+	// Signal is the nth bit in the bitfield.
+	bit := int(signal - 1)
+	// Word within the sigset the bit needs to be written to.
+	word := bit / wordBits
+
+	if word >= len(set.Val) {
+		return fmt.Errorf("signal %d does not fit within unix.Sigset_t", signal)
+	}
+
+	// Write the signal bit into its corresponding word at the corrected offset.
+	set.Val[word] |= 1 << (bit % wordBits)
+
+	return nil
+}
@@ -0,0 +1,78 @@
+package sys
+
+import (
+	"runtime"
+	"testing"
+	"unsafe"
+
+	"github.com/cilium/ebpf/internal/unix"
+
+	qt "github.com/frankban/quicktest"
+)
+
+func TestSigset(t *testing.T) {
+	const maxSignal = unix.Signal(unsafe.Sizeof(unix.Sigset_t{}) * 8)
+
+	// Type-infer a sigset word. This is a typed uint of 32 or 64 bits depending
+	// on the target architecture, so we can't use an untyped uint.
+	zero := unix.Sigset_t{}.Val[0]
+	words := len(unix.Sigset_t{}.Val)
+
+	var want, got unix.Sigset_t
+	// Flip the first bit of the first word.
+	if err := sigsetAdd(&got, 1); err != nil {
+		t.Fatal(err)
+	}
+	want.Val[0] = 1
+	if want != got {
+		t.Fatalf("expected first word to be 0x%x, got: 0x%x", want, got)
+	}
+
+	// And the last bit of the last word.
+	if err := sigsetAdd(&got, maxSignal); err != nil {
+		t.Fatal(err)
+	}
+	want.Val[words-1] = ^(^zero >> 1)
+	if want != got {
+		t.Fatalf("expected last word to be 0x%x, got: 0x%x", want, got)
+	}
+
+	if err := sigsetAdd(&got, maxSignal+1); err == nil {
+		t.Fatal("expected out-of-bounds add to be rejected")
+	}
+	if err := sigsetAdd(&got, -1); err == nil {
+		t.Fatal("expected negative signal to be rejected")
+	}
+}
+
+func TestProfilerSignal(t *testing.T) {
+	// Additional goroutine lock to make the PthreadSigmask below execute on the
+	// same OS thread as the functions under test. UnlockOSThread needs to be
+	// called as many times as LockOSThread to unlock the goroutine.
+	runtime.LockOSThread()
+	defer runtime.UnlockOSThread()
+
+	var old unix.Sigset_t
+	if err := unix.PthreadSigmask(0, nil, &old); err != nil {
+		t.Fatal("get sigmask:", err)
+	}
+
+	maskProfilerSignal()
+
+	var have unix.Sigset_t
+	if err := unix.PthreadSigmask(0, nil, &have); err != nil {
+		t.Fatal("get sigmask:", err)
+	}
+
+	want := have
+	qt.Assert(t, sigsetAdd(&want, unix.SIGPROF), qt.IsNil)
+	qt.Assert(t, have, qt.Equals, want)
+
+	unmaskProfilerSignal()
+
+	if err := unix.PthreadSigmask(0, nil, &have); err != nil {
+		t.Fatal("get sigmask:", err)
+	}
+
+	qt.Assert(t, have, qt.Equals, old)
+}
@@ -0,0 +1,178 @@
+package sys
+
+import (
+	"runtime"
+	"syscall"
+	"unsafe"
+
+	"github.com/cilium/ebpf/internal/unix"
+)
+
+// ENOTSUPP is a Linux internal error code that has leaked into UAPI.
+//
+// It is not the same as ENOTSUP or EOPNOTSUPP.
+var ENOTSUPP = syscall.Errno(524)
+
+// BPF wraps SYS_BPF.
+//
+// Any pointers contained in attr must use the Pointer type from this package.
+func BPF(cmd Cmd, attr unsafe.Pointer, size uintptr) (uintptr, error) {
+	// Prevent the Go profiler from repeatedly interrupting the verifier,
+	// which could otherwise lead to a livelock due to receiving EAGAIN.
+	if cmd == BPF_PROG_LOAD || cmd == BPF_PROG_RUN {
+		maskProfilerSignal()
+		defer unmaskProfilerSignal()
+	}
+
+	for {
+		r1, _, errNo := unix.Syscall(unix.SYS_BPF, uintptr(cmd), uintptr(attr), size)
+		runtime.KeepAlive(attr)
+
+		// As of ~4.20 the verifier can be interrupted by a signal,
+		// and returns EAGAIN in that case.
+		if errNo == unix.EAGAIN && cmd == BPF_PROG_LOAD {
+			continue
+		}
+
+		var err error
+		if errNo != 0 {
+			err = wrappedErrno{errNo}
+		}
+
+		return r1, err
+	}
+}
+
+// Info is implemented by all structs that can be passed to the ObjInfo syscall.
+//
+//	MapInfo
+//	ProgInfo
+//	LinkInfo
+//	BtfInfo
+type Info interface {
+	info() (unsafe.Pointer, uint32)
+}
+
+var _ Info = (*MapInfo)(nil)
+
+func (i *MapInfo) info() (unsafe.Pointer, uint32) {
+	return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i))
+}
+
+var _ Info = (*ProgInfo)(nil)
+
+func (i *ProgInfo) info() (unsafe.Pointer, uint32) {
+	return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i))
+}
+
+var _ Info = (*LinkInfo)(nil)
+
+func (i *LinkInfo) info() (unsafe.Pointer, uint32) {
+	return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i))
+}
+
+var _ Info = (*BtfInfo)(nil)
+
+func (i *BtfInfo) info() (unsafe.Pointer, uint32) {
+	return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i))
+}
+
+// ObjInfo retrieves information about a BPF Fd.
+//
+// info may be one of MapInfo, ProgInfo, LinkInfo and BtfInfo.
+func ObjInfo(fd *FD, info Info) error {
+	ptr, len := info.info()
+	err := ObjGetInfoByFd(&ObjGetInfoByFdAttr{
+		BpfFd:   fd.Uint(),
+		InfoLen: len,
+		Info:    NewPointer(ptr),
+	})
+	runtime.KeepAlive(fd)
+	return err
+}
+
+// BPFObjName is a null-terminated string made up of
+// 'A-Za-z0-9_' characters.
+type ObjName [unix.BPF_OBJ_NAME_LEN]byte
+
+// NewObjName truncates the result if it is too long.
+func NewObjName(name string) ObjName {
+	var result ObjName
+	copy(result[:unix.BPF_OBJ_NAME_LEN-1], name)
+	return result
+}
+
+// LogLevel controls the verbosity of the kernel's eBPF program verifier.
+type LogLevel uint32
+
+const (
+	BPF_LOG_LEVEL1 LogLevel = 1 << iota
+	BPF_LOG_LEVEL2
+	BPF_LOG_STATS
+)
+
+// LinkID uniquely identifies a bpf_link.
+type LinkID uint32
+
+// BTFID uniquely identifies a BTF blob loaded into the kernel.
+type BTFID uint32
+
+// TypeID identifies a type in a BTF blob.
+type TypeID uint32
+
+// MapFlags control map behaviour.
+type MapFlags uint32
+
+//go:generate stringer -type MapFlags
+
+const (
+	BPF_F_NO_PREALLOC MapFlags = 1 << iota
+	BPF_F_NO_COMMON_LRU
+	BPF_F_NUMA_NODE
+	BPF_F_RDONLY
+	BPF_F_WRONLY
+	BPF_F_STACK_BUILD_ID
+	BPF_F_ZERO_SEED
+	BPF_F_RDONLY_PROG
+	BPF_F_WRONLY_PROG
+	BPF_F_CLONE
+	BPF_F_MMAPABLE
+	BPF_F_PRESERVE_ELEMS
+	BPF_F_INNER_MAP
+)
+
+// wrappedErrno wraps syscall.Errno to prevent direct comparisons with
+// syscall.E* or unix.E* constants.
+//
+// You should never export an error of this type.
+type wrappedErrno struct {
+	syscall.Errno
+}
+
+func (we wrappedErrno) Unwrap() error {
+	return we.Errno
+}
+
+func (we wrappedErrno) Error() string {
+	if we.Errno == ENOTSUPP {
+		return "operation not supported"
+	}
+	return we.Errno.Error()
+}
+
+type syscallError struct {
+	error
+	errno syscall.Errno
+}
+
+func Error(err error, errno syscall.Errno) error {
+	return &syscallError{err, errno}
+}
+
+func (se *syscallError) Is(target error) bool {
+	return target == se.error
+}
+
+func (se *syscallError) Unwrap() error {
+	return se.errno
+}
@@ -0,0 +1,61 @@
+package sys
+
+import (
+	"errors"
+	"testing"
+
+	"github.com/cilium/ebpf/internal/unix"
+
+	qt "github.com/frankban/quicktest"
+)
+
+func TestObjName(t *testing.T) {
+	name := NewObjName("more_than_16_characters_long")
+	if name[len(name)-1] != 0 {
+		t.Error("NewBPFObjName doesn't null terminate")
+	}
+	if len(name) != unix.BPF_OBJ_NAME_LEN {
+		t.Errorf("Name is %d instead of %d bytes long", len(name), unix.BPF_OBJ_NAME_LEN)
+	}
+}
+
+func TestWrappedErrno(t *testing.T) {
+	a := error(wrappedErrno{unix.EINVAL})
+	b := error(unix.EINVAL)
+
+	if a == b {
+		t.Error("wrappedErrno is comparable to plain errno")
+	}
+
+	if !errors.Is(a, b) {
+		t.Error("errors.Is(wrappedErrno, errno) returns false")
+	}
+
+	if errors.Is(a, unix.EAGAIN) {
+		t.Error("errors.Is(wrappedErrno, EAGAIN) returns true")
+	}
+
+	notsupp := wrappedErrno{ENOTSUPP}
+	qt.Assert(t, notsupp.Error(), qt.Contains, "operation not supported")
+}
+
+func TestSyscallError(t *testing.T) {
+	err := errors.New("foo")
+	foo := Error(err, unix.EINVAL)
+
+	if !errors.Is(foo, unix.EINVAL) {
+		t.Error("SyscallError is not the wrapped errno")
+	}
+
+	if !errors.Is(foo, err) {
+		t.Error("SyscallError is not the wrapped error")
+	}
+
+	if errors.Is(unix.EINVAL, foo) {
+		t.Error("Errno is the SyscallError")
+	}
+
+	if errors.Is(err, foo) {
+		t.Error("Error is the SyscallError")
+	}
+}
@@ -0,0 +1,21 @@
+package testutils
+
+import (
+	"os"
+	"testing"
+)
+
+// TempBPFFS creates a temporary directory on a BPF FS.
+//
+// The directory is automatically cleaned up at the end of the test run.
+func TempBPFFS(tb testing.TB) string {
+	tb.Helper()
+
+	tmp, err := os.MkdirTemp("/sys/fs/bpf", "ebpf-test")
+	if err != nil {
+		tb.Fatal("Create temporary directory on BPFFS:", err)
+	}
+	tb.Cleanup(func() { os.RemoveAll(tmp) })
+
+	return tmp
+}
@@ -0,0 +1,65 @@
+package testutils
+
+import (
+	"errors"
+	"os"
+	"strings"
+	"testing"
+
+	"github.com/cilium/ebpf/internal"
+	"github.com/cilium/ebpf/internal/unix"
+)
+
+var cgroup2Path = internal.Memoize(func() (string, error) {
+	mounts, err := os.ReadFile("/proc/mounts")
+	if err != nil {
+		return "", err
+	}
+
+	for _, line := range strings.Split(string(mounts), "\n") {
+		mount := strings.SplitN(line, " ", 3)
+		if mount[0] == "cgroup2" {
+			return mount[1], nil
+		}
+
+		continue
+	}
+
+	return "", errors.New("cgroup2 not mounted")
+})
+
+func CreateCgroup(tb testing.TB) *os.File {
+	tb.Helper()
+
+	cg2, err := cgroup2Path()
+	if err != nil {
+		tb.Fatal("Can't locate cgroup2 mount:", err)
+	}
+
+	cgdir, err := os.MkdirTemp(cg2, "ebpf-link")
+	if err != nil {
+		tb.Fatal("Can't create cgroupv2:", err)
+	}
+
+	cgroup, err := os.Open(cgdir)
+	if err != nil {
+		os.Remove(cgdir)
+		tb.Fatal(err)
+	}
+	tb.Cleanup(func() {
+		cgroup.Close()
+		os.Remove(cgdir)
+	})
+
+	return cgroup
+}
+
+func GetCgroupIno(t *testing.T, cgroup *os.File) uint64 {
+	cgroupStat := unix.Stat_t{}
+	err := unix.Fstat(int(cgroup.Fd()), &cgroupStat)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	return cgroupStat.Ino
+}
@@ -0,0 +1,34 @@
+package fdtrace
+
+import (
+	"fmt"
+	"os"
+	"runtime"
+	"testing"
+
+	"github.com/cilium/ebpf/internal/sys"
+)
+
+// TestMain runs m with sys.FD leak tracing enabled.
+func TestMain(m *testing.M) {
+	// fn can either be invoked asynchronously by the gc or during disabling of
+	// the leak tracer below. Don't terminate the program immediately, instead
+	// capture a boolean that will be used to set the exit code. This avoids races
+	// and gives all events the chance to be written to stderr.
+	var leak bool
+	sys.OnLeakFD(func(fs *runtime.Frames) {
+		fmt.Fprintln(os.Stderr, "leaked fd created at:")
+		fmt.Fprintln(os.Stderr, sys.FormatFrames(fs))
+		leak = true
+	})
+
+	ret := m.Run()
+
+	sys.OnLeakFD(nil)
+
+	if leak {
+		ret = 99
+	}
+
+	os.Exit(ret)
+}
@@ -0,0 +1,139 @@
+package testutils
+
+import (
+	"errors"
+	"os"
+	"strings"
+	"testing"
+
+	"github.com/cilium/ebpf/internal"
+)
+
+const (
+	ignoreKernelVersionEnvVar = "EBPF_TEST_IGNORE_KERNEL_VERSION"
+)
+
+func CheckFeatureTest(t *testing.T, fn func() error) {
+	checkFeatureTestError(t, fn())
+}
+
+func checkFeatureTestError(t *testing.T, err error) {
+	if err == nil {
+		return
+	}
+
+	var ufe *internal.UnsupportedFeatureError
+	if errors.As(err, &ufe) {
+		if ignoreKernelVersionCheck(t.Name()) {
+			t.Skipf("Ignoring error due to %s: %s", ignoreKernelVersionEnvVar, ufe.Error())
+		} else {
+			checkKernelVersion(t, ufe)
+		}
+	} else {
+		t.Error("Feature test failed:", err)
+	}
+}
+
+func CheckFeatureMatrix[K comparable](t *testing.T, fm internal.FeatureMatrix[K]) {
+	t.Helper()
+
+	for key, ft := range fm {
+		t.Run(ft.Name, func(t *testing.T) {
+			checkFeatureTestError(t, fm.Result(key))
+		})
+	}
+}
+
+func SkipIfNotSupported(tb testing.TB, err error) {
+	tb.Helper()
+
+	if err == internal.ErrNotSupported {
+		tb.Fatal("Unwrapped ErrNotSupported")
+	}
+
+	var ufe *internal.UnsupportedFeatureError
+	if errors.As(err, &ufe) {
+		checkKernelVersion(tb, ufe)
+		tb.Skip(ufe.Error())
+	}
+	if errors.Is(err, internal.ErrNotSupported) {
+		tb.Skip(err.Error())
+	}
+}
+
+func checkKernelVersion(tb testing.TB, ufe *internal.UnsupportedFeatureError) {
+	if ufe.MinimumVersion.Unspecified() {
+		return
+	}
+
+	if !isKernelLessThan(tb, ufe.MinimumVersion) {
+		tb.Helper()
+		tb.Fatalf("Feature '%s' isn't supported even though kernel is newer than %s",
+			ufe.Name, ufe.MinimumVersion)
+	}
+}
+
+func SkipOnOldKernel(tb testing.TB, minVersion, feature string) {
+	tb.Helper()
+
+	if IsKernelLessThan(tb, minVersion) {
+		tb.Skipf("Test requires at least kernel %s (due to missing %s)", minVersion, feature)
+	}
+}
+
+func IsKernelLessThan(tb testing.TB, minVersion string) bool {
+	tb.Helper()
+
+	minv, err := internal.NewVersion(minVersion)
+	if err != nil {
+		tb.Fatalf("Invalid version %s: %s", minVersion, err)
+	}
+
+	return isKernelLessThan(tb, minv)
+}
+
+func isKernelLessThan(tb testing.TB, minv internal.Version) bool {
+	tb.Helper()
+
+	if max := os.Getenv("CI_MAX_KERNEL_VERSION"); max != "" {
+		maxv, err := internal.NewVersion(max)
+		if err != nil {
+			tb.Fatalf("Invalid version %q in CI_MAX_KERNEL_VERSION: %s", max, err)
+		}
+
+		if maxv.Less(minv) {
+			tb.Fatalf("Test for %s will never execute on CI since %s is the most recent kernel", minv, maxv)
+		}
+	}
+
+	return kernelVersion(tb).Less(minv)
+}
+
+func kernelVersion(tb testing.TB) internal.Version {
+	tb.Helper()
+
+	v, err := internal.KernelVersion()
+	if err != nil {
+		tb.Fatal(err)
+	}
+	return v
+}
+
+// ignoreKernelVersionCheck checks if test name should be ignored for kernel version check by checking against environment var EBPF_TEST_IGNORE_KERNEL_VERSION.
+// EBPF_TEST_IGNORE_KERNEL_VERSION is a comma (,) separated list of test names for which kernel version check should be ignored.
+//
+// eg: EBPF_TEST_IGNORE_KERNEL_VERSION=TestABC,TestXYZ
+func ignoreKernelVersionCheck(tName string) bool {
+	tNames := os.Getenv(ignoreKernelVersionEnvVar)
+	if tNames == "" {
+		return false
+	}
+
+	ignored := strings.Split(tNames, ",")
+	for _, n := range ignored {
+		if strings.TrimSpace(n) == tName {
+			return true
+		}
+	}
+	return false
+}
@@ -0,0 +1,54 @@
+package testutils
+
+import (
+	"testing"
+)
+
+func TestIgnoreKernelVersionCheckWhenEnvVarIsSet(t *testing.T) {
+	tests := []struct {
+		name                     string
+		toIgnoreNamesEnvValue    string
+		testName                 string
+		ignoreKernelVersionCheck bool
+	}{
+		{
+			name:                     "should NOT ignore kernel version check if environment var set to empty string",
+			toIgnoreNamesEnvValue:    "",
+			testName:                 "TestABC",
+			ignoreKernelVersionCheck: false,
+		},
+		{
+			name:                     "should ignore kernel version check if environment var set to skip test name with single value",
+			toIgnoreNamesEnvValue:    "TestABC",
+			testName:                 "TestABC",
+			ignoreKernelVersionCheck: true,
+		},
+		{
+			name:                     "should match test name when multiple comma separated names list is provided",
+			toIgnoreNamesEnvValue:    "TestABC,TestXYZ",
+			testName:                 "TestXYZ",
+			ignoreKernelVersionCheck: true,
+		},
+		{
+			name:                     "should NOT match test name when multiple comma separated names list is provided but name is not present in list",
+			toIgnoreNamesEnvValue:    "TestABC,TestXYZ",
+			testName:                 "TestPQR",
+			ignoreKernelVersionCheck: false,
+		},
+		{
+			name:                     "should match test name if names list has leading/trailing spaces",
+			toIgnoreNamesEnvValue:    "TestABC, TestXYZ , TestPQR",
+			testName:                 "TestXYZ",
+			ignoreKernelVersionCheck: true,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Setenv(ignoreKernelVersionEnvVar, tt.toIgnoreNamesEnvValue)
+
+			if got := ignoreKernelVersionCheck(tt.testName); got != tt.ignoreKernelVersionCheck {
+				t.Errorf("ignoreKernelVersionCheck() = %v, want %v", got, tt.ignoreKernelVersionCheck)
+			}
+		})
+	}
+}
@@ -0,0 +1,58 @@
+package testutils
+
+import (
+	"path/filepath"
+	"testing"
+)
+
+// Files calls fn for each given file.
+//
+// The function errors out if the pattern matches no files.
+func Files(t *testing.T, files []string, fn func(*testing.T, string)) {
+	t.Helper()
+
+	if len(files) == 0 {
+		t.Fatalf("No files given")
+	}
+
+	for _, f := range files {
+		file := f // force copy
+		name := filepath.Base(file)
+		t.Run(name, func(t *testing.T) {
+			fn(t, file)
+		})
+	}
+}
+
+// Glob finds files matching a pattern.
+//
+// The pattern should may include full path. Excludes use the same syntax as
+// pattern, but are only applied to the basename instead of the full path.
+func Glob(tb testing.TB, pattern string, excludes ...string) []string {
+	tb.Helper()
+
+	files, err := filepath.Glob(pattern)
+	if err != nil {
+		tb.Fatal("Can't glob files:", err)
+	}
+
+	if len(excludes) == 0 {
+		return files
+	}
+
+	var filtered []string
+nextFile:
+	for _, file := range files {
+		base := filepath.Base(file)
+		for _, exclude := range excludes {
+			if matched, err := filepath.Match(exclude, base); err != nil {
+				tb.Fatal(err)
+			} else if matched {
+				continue nextFile
+			}
+		}
+		filtered = append(filtered, file)
+	}
+
+	return filtered
+}
@@ -0,0 +1,16 @@
+package testutils
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/cilium/ebpf/rlimit"
+)
+
+func init() {
+	// Increase the memlock for all tests unconditionally. It's a great source of
+	// weird bugs, since different distros have different default limits.
+	if err := rlimit.RemoveMemlock(); err != nil {
+		fmt.Fprintln(os.Stderr, "WARNING: Failed to adjust rlimit, tests may fail")
+	}
+}
@@ -0,0 +1,21 @@
+package testutils
+
+import (
+	"fmt"
+	"math/rand"
+	"sync"
+	"time"
+)
+
+var randSeed struct {
+	value int64
+	once  sync.Once
+}
+
+func Rand() *rand.Rand {
+	randSeed.once.Do(func() {
+		randSeed.value = time.Now().UnixMicro()
+		fmt.Printf("Random seed is %d\n", randSeed.value)
+	})
+	return rand.New(rand.NewSource(randSeed.value))
+}
@@ -0,0 +1,359 @@
+package tracefs
+
+import (
+	"crypto/rand"
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"syscall"
+
+	"github.com/cilium/ebpf/internal"
+	"github.com/cilium/ebpf/internal/unix"
+)
+
+var (
+	ErrInvalidInput = errors.New("invalid input")
+
+	ErrInvalidMaxActive = errors.New("can only set maxactive on kretprobes")
+)
+
+//go:generate stringer -type=ProbeType -linecomment
+
+type ProbeType uint8
+
+const (
+	Kprobe ProbeType = iota // kprobe
+	Uprobe                  // uprobe
+)
+
+func (pt ProbeType) eventsFile() (*os.File, error) {
+	path, err := sanitizeTracefsPath(fmt.Sprintf("%s_events", pt.String()))
+	if err != nil {
+		return nil, err
+	}
+
+	return os.OpenFile(path, os.O_APPEND|os.O_WRONLY, 0666)
+}
+
+type ProbeArgs struct {
+	Type                         ProbeType
+	Symbol, Group, Path          string
+	Offset, RefCtrOffset, Cookie uint64
+	Pid, RetprobeMaxActive       int
+	Ret                          bool
+}
+
+// RandomGroup generates a pseudorandom string for use as a tracefs group name.
+// Returns an error when the output string would exceed 63 characters (kernel
+// limitation), when rand.Read() fails or when prefix contains characters not
+// allowed by IsValidTraceID.
+func RandomGroup(prefix string) (string, error) {
+	if !validIdentifier(prefix) {
+		return "", fmt.Errorf("prefix '%s' must be alphanumeric or underscore: %w", prefix, ErrInvalidInput)
+	}
+
+	b := make([]byte, 8)
+	if _, err := rand.Read(b); err != nil {
+		return "", fmt.Errorf("reading random bytes: %w", err)
+	}
+
+	group := fmt.Sprintf("%s_%x", prefix, b)
+	if len(group) > 63 {
+		return "", fmt.Errorf("group name '%s' cannot be longer than 63 characters: %w", group, ErrInvalidInput)
+	}
+
+	return group, nil
+}
+
+// validIdentifier implements the equivalent of a regex match
+// against "^[a-zA-Z_][0-9a-zA-Z_]*$".
+//
+// Trace event groups, names and kernel symbols must adhere to this set
+// of characters. Non-empty, first character must not be a number, all
+// characters must be alphanumeric or underscore.
+func validIdentifier(s string) bool {
+	if len(s) < 1 {
+		return false
+	}
+	for i, c := range []byte(s) {
+		switch {
+		case c >= 'a' && c <= 'z':
+		case c >= 'A' && c <= 'Z':
+		case c == '_':
+		case i > 0 && c >= '0' && c <= '9':
+
+		default:
+			return false
+		}
+	}
+
+	return true
+}
+
+func sanitizeTracefsPath(path ...string) (string, error) {
+	base, err := getTracefsPath()
+	if err != nil {
+		return "", err
+	}
+	l := filepath.Join(path...)
+	p := filepath.Join(base, l)
+	if !strings.HasPrefix(p, base) {
+		return "", fmt.Errorf("path '%s' attempts to escape base path '%s': %w", l, base, ErrInvalidInput)
+	}
+	return p, nil
+}
+
+// getTracefsPath will return a correct path to the tracefs mount point.
+// Since kernel 4.1 tracefs should be mounted by default at /sys/kernel/tracing,
+// but may be also be available at /sys/kernel/debug/tracing if debugfs is mounted.
+// The available tracefs paths will depends on distribution choices.
+var getTracefsPath = internal.Memoize(func() (string, error) {
+	for _, p := range []struct {
+		path   string
+		fsType int64
+	}{
+		{"/sys/kernel/tracing", unix.TRACEFS_MAGIC},
+		{"/sys/kernel/debug/tracing", unix.TRACEFS_MAGIC},
+		// RHEL/CentOS
+		{"/sys/kernel/debug/tracing", unix.DEBUGFS_MAGIC},
+	} {
+		if fsType, err := internal.FSType(p.path); err == nil && fsType == p.fsType {
+			return p.path, nil
+		}
+	}
+
+	return "", errors.New("neither debugfs nor tracefs are mounted")
+})
+
+// sanitizeIdentifier replaces every invalid character for the tracefs api with an underscore.
+//
+// It is equivalent to calling regexp.MustCompile("[^a-zA-Z0-9]+").ReplaceAllString("_").
+func sanitizeIdentifier(s string) string {
+	var skip bool
+	return strings.Map(func(c rune) rune {
+		switch {
+		case c >= 'a' && c <= 'z',
+			c >= 'A' && c <= 'Z',
+			c >= '0' && c <= '9':
+			skip = false
+			return c
+
+		case skip:
+			return -1
+
+		default:
+			skip = true
+			return '_'
+		}
+	}, s)
+}
+
+// EventID reads a trace event's ID from tracefs given its group and name.
+// The kernel requires group and name to be alphanumeric or underscore.
+func EventID(group, name string) (uint64, error) {
+	if !validIdentifier(group) {
+		return 0, fmt.Errorf("invalid tracefs group: %q", group)
+	}
+
+	if !validIdentifier(name) {
+		return 0, fmt.Errorf("invalid tracefs name: %q", name)
+	}
+
+	path, err := sanitizeTracefsPath("events", group, name, "id")
+	if err != nil {
+		return 0, err
+	}
+	tid, err := internal.ReadUint64FromFile("%d\n", path)
+	if errors.Is(err, os.ErrNotExist) {
+		return 0, err
+	}
+	if err != nil {
+		return 0, fmt.Errorf("reading trace event ID of %s/%s: %w", group, name, err)
+	}
+
+	return tid, nil
+}
+
+func probePrefix(ret bool, maxActive int) string {
+	if ret {
+		if maxActive > 0 {
+			return fmt.Sprintf("r%d", maxActive)
+		}
+		return "r"
+	}
+	return "p"
+}
+
+// Event represents an entry in a tracefs probe events file.
+type Event struct {
+	typ         ProbeType
+	group, name string
+	// event id allocated by the kernel. 0 if the event has already been removed.
+	id uint64
+}
+
+// NewEvent creates a new ephemeral trace event.
+//
+// Returns os.ErrNotExist if symbol is not a valid
+// kernel symbol, or if it is not traceable with kprobes. Returns os.ErrExist
+// if a probe with the same group and symbol already exists. Returns an error if
+// args.RetprobeMaxActive is used on non kprobe types. Returns ErrNotSupported if
+// the kernel is too old to support kretprobe maxactive.
+func NewEvent(args ProbeArgs) (*Event, error) {
+	// Before attempting to create a trace event through tracefs,
+	// check if an event with the same group and name already exists.
+	// Kernels 4.x and earlier don't return os.ErrExist on writing a duplicate
+	// entry, so we need to rely on reads for detecting uniqueness.
+	eventName := sanitizeIdentifier(args.Symbol)
+	_, err := EventID(args.Group, eventName)
+	if err == nil {
+		return nil, fmt.Errorf("trace event %s/%s: %w", args.Group, eventName, os.ErrExist)
+	}
+	if err != nil && !errors.Is(err, os.ErrNotExist) {
+		return nil, fmt.Errorf("checking trace event %s/%s: %w", args.Group, eventName, err)
+	}
+
+	// Open the kprobe_events file in tracefs.
+	f, err := args.Type.eventsFile()
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	var pe, token string
+	switch args.Type {
+	case Kprobe:
+		// The kprobe_events syntax is as follows (see Documentation/trace/kprobetrace.txt):
+		// p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe
+		// r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe
+		// -:[GRP/]EVENT                                        : Clear a probe
+		//
+		// Some examples:
+		// r:ebpf_1234/r_my_kretprobe nf_conntrack_destroy
+		// p:ebpf_5678/p_my_kprobe __x64_sys_execve
+		//
+		// Leaving the kretprobe's MAXACTIVE set to 0 (or absent) will make the
+		// kernel default to NR_CPUS. This is desired in most eBPF cases since
+		// subsampling or rate limiting logic can be more accurately implemented in
+		// the eBPF program itself.
+		// See Documentation/kprobes.txt for more details.
+		if args.RetprobeMaxActive != 0 && !args.Ret {
+			return nil, ErrInvalidMaxActive
+		}
+		token = KprobeToken(args)
+		pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.Ret, args.RetprobeMaxActive), args.Group, eventName, token)
+	case Uprobe:
+		// The uprobe_events syntax is as follows:
+		// p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a probe
+		// r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return probe
+		// -:[GRP/]EVENT                           : Clear a probe
+		//
+		// Some examples:
+		// r:ebpf_1234/readline /bin/bash:0x12345
+		// p:ebpf_5678/main_mySymbol /bin/mybin:0x12345(0x123)
+		//
+		// See Documentation/trace/uprobetracer.txt for more details.
+		if args.RetprobeMaxActive != 0 {
+			return nil, ErrInvalidMaxActive
+		}
+		token = UprobeToken(args)
+		pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.Ret, 0), args.Group, eventName, token)
+	}
+	_, err = f.WriteString(pe)
+
+	// Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL
+	// when trying to create a retprobe for a missing symbol.
+	if errors.Is(err, os.ErrNotExist) {
+		return nil, fmt.Errorf("token %s: not found: %w", token, err)
+	}
+	// Since commit ab105a4fb894, EILSEQ is returned when a kprobe sym+offset is resolved
+	// to an invalid insn boundary. The exact conditions that trigger this error are
+	// arch specific however.
+	if errors.Is(err, syscall.EILSEQ) {
+		return nil, fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist)
+	}
+	// ERANGE is returned when the `SYM[+offs]` token is too big and cannot
+	// be resolved.
+	if errors.Is(err, syscall.ERANGE) {
+		return nil, fmt.Errorf("token %s: offset too big: %w", token, os.ErrNotExist)
+	}
+
+	if err != nil {
+		return nil, fmt.Errorf("token %s: writing '%s': %w", token, pe, err)
+	}
+
+	// Get the newly-created trace event's id.
+	tid, err := EventID(args.Group, eventName)
+	if args.RetprobeMaxActive != 0 && errors.Is(err, os.ErrNotExist) {
+		// Kernels < 4.12 don't support maxactive and therefore auto generate
+		// group and event names from the symbol and offset. The symbol is used
+		// without any sanitization.
+		// See https://elixir.bootlin.com/linux/v4.10/source/kernel/trace/trace_kprobe.c#L712
+		event := fmt.Sprintf("kprobes/r_%s_%d", args.Symbol, args.Offset)
+		if err := removeEvent(args.Type, event); err != nil {
+			return nil, fmt.Errorf("failed to remove spurious maxactive event: %s", err)
+		}
+		return nil, fmt.Errorf("create trace event with non-default maxactive: %w", internal.ErrNotSupported)
+	}
+	if err != nil {
+		return nil, fmt.Errorf("get trace event id: %w", err)
+	}
+
+	evt := &Event{args.Type, args.Group, eventName, tid}
+	runtime.SetFinalizer(evt, (*Event).Close)
+	return evt, nil
+}
+
+// Close removes the event from tracefs.
+//
+// Returns os.ErrClosed if the event has already been closed before.
+func (evt *Event) Close() error {
+	if evt.id == 0 {
+		return os.ErrClosed
+	}
+
+	evt.id = 0
+	runtime.SetFinalizer(evt, nil)
+	pe := fmt.Sprintf("%s/%s", evt.group, evt.name)
+	return removeEvent(evt.typ, pe)
+}
+
+func removeEvent(typ ProbeType, pe string) error {
+	f, err := typ.eventsFile()
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	// See [k,u]probe_events syntax above. The probe type does not need to be specified
+	// for removals.
+	if _, err = f.WriteString("-:" + pe); err != nil {
+		return fmt.Errorf("remove event %q from %s: %w", pe, f.Name(), err)
+	}
+
+	return nil
+}
+
+// ID returns the tracefs ID associated with the event.
+func (evt *Event) ID() uint64 {
+	return evt.id
+}
+
+// Group returns the tracefs group used by the event.
+func (evt *Event) Group() string {
+	return evt.group
+}
+
+// KprobeToken creates the SYM[+offs] token for the tracefs api.
+func KprobeToken(args ProbeArgs) string {
+	po := args.Symbol
+
+	if args.Offset != 0 {
+		po += fmt.Sprintf("+%#x", args.Offset)
+	}
+
+	return po
+}
@@ -0,0 +1,79 @@
+package tracefs
+
+import (
+	"fmt"
+	"os"
+	"testing"
+
+	qt "github.com/frankban/quicktest"
+)
+
+// Global symbol, present on all tested kernels.
+const ksym = "vprintk"
+
+func TestKprobeTraceFSGroup(t *testing.T) {
+	c := qt.New(t)
+
+	// Expect <prefix>_<16 random hex chars>.
+	g, err := RandomGroup("ebpftest")
+	c.Assert(err, qt.IsNil)
+	c.Assert(g, qt.Matches, `ebpftest_[a-f0-9]{16}`)
+
+	// Expect error when the generator's output exceeds 63 characters.
+	p := make([]byte, 47) // 63 - 17 (length of the random suffix and underscore) + 1
+	for i := range p {
+		p[i] = byte('a')
+	}
+	_, err = RandomGroup(string(p))
+	c.Assert(err, qt.Not(qt.IsNil))
+
+	// Reject non-alphanumeric characters.
+	_, err = RandomGroup("/")
+	c.Assert(err, qt.Not(qt.IsNil))
+}
+
+func TestKprobeToken(t *testing.T) {
+	tests := []struct {
+		args     ProbeArgs
+		expected string
+	}{
+		{ProbeArgs{Symbol: "symbol"}, "symbol"},
+		{ProbeArgs{Symbol: "symbol", Offset: 1}, "symbol+0x1"},
+		{ProbeArgs{Symbol: "symbol", Offset: 65535}, "symbol+0xffff"},
+		{ProbeArgs{Symbol: "symbol", Offset: 65536}, "symbol+0x10000"},
+	}
+
+	for i, tt := range tests {
+		t.Run(fmt.Sprint(i), func(t *testing.T) {
+			po := KprobeToken(tt.args)
+			if tt.expected != po {
+				t.Errorf("Expected symbol+offset to be '%s', got '%s'", tt.expected, po)
+			}
+		})
+	}
+}
+
+func TestNewEvent(t *testing.T) {
+	for _, args := range []ProbeArgs{
+		{Type: Kprobe, Symbol: ksym},
+		{Type: Kprobe, Symbol: ksym, Ret: true},
+		{Type: Uprobe, Path: "/bin/bash", Symbol: "main"},
+		{Type: Uprobe, Path: "/bin/bash", Symbol: "main", Ret: true},
+	} {
+		name := fmt.Sprintf("%s ret=%v", args.Type, args.Ret)
+		t.Run(name, func(t *testing.T) {
+			args.Group, _ = RandomGroup("ebpftest")
+
+			evt, err := NewEvent(args)
+			qt.Assert(t, err, qt.IsNil)
+			defer evt.Close()
+
+			_, err = NewEvent(args)
+			qt.Assert(t, err, qt.ErrorIs, os.ErrExist,
+				qt.Commentf("expected consecutive event creation to contain os.ErrExist"))
+
+			qt.Assert(t, evt.Close(), qt.IsNil)
+			qt.Assert(t, evt.Close(), qt.ErrorIs, os.ErrClosed)
+		})
+	}
+}
@@ -0,0 +1,86 @@
+package tracefs
+
+import (
+	"errors"
+	"fmt"
+	"testing"
+
+	qt "github.com/frankban/quicktest"
+)
+
+func TestEventID(t *testing.T) {
+	c := qt.New(t)
+
+	eid, err := EventID("syscalls", "sys_enter_mmap")
+	c.Assert(err, qt.IsNil)
+	c.Assert(eid, qt.Not(qt.Equals), 0)
+}
+
+func TestSanitizePath(t *testing.T) {
+	_, err := sanitizeTracefsPath("../escaped")
+	if !errors.Is(err, ErrInvalidInput) {
+		t.Errorf("expected error %s, got: %s", ErrInvalidInput, err)
+	}
+
+	_, err = sanitizeTracefsPath("./not/escaped")
+	if err != nil {
+		t.Errorf("expected no error, got: %s", err)
+	}
+}
+
+func TestValidIdentifier(t *testing.T) {
+	tests := []struct {
+		name string
+		in   string
+		fail bool
+	}{
+		{"empty string", "", true},
+		{"leading number", "1test", true},
+		{"underscore first", "__x64_syscall", false},
+		{"contains number", "bpf_trace_run1", false},
+		{"underscore", "_", false},
+		{"contains dash", "-EINVAL", true},
+		{"contains number", "all0wed", false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			exp := "pass"
+			if tt.fail {
+				exp = "fail"
+			}
+
+			if validIdentifier(tt.in) == tt.fail {
+				t.Errorf("expected string '%s' to %s valid ID check", tt.in, exp)
+			}
+		})
+	}
+}
+
+func TestSanitizeIdentifier(t *testing.T) {
+	tests := []struct {
+		symbol   string
+		expected string
+	}{
+		{"readline", "readline"},
+		{"main.Func123", "main_Func123"},
+		{"a.....a", "a_a"},
+		{"./;'{}[]a", "_a"},
+		{"***xx**xx###", "_xx_xx_"},
+		{`@P#r$i%v^3*+t)i&k++--`, "_P_r_i_v_3_t_i_k_"},
+	}
+
+	for i, tt := range tests {
+		t.Run(fmt.Sprint(i), func(t *testing.T) {
+			sanitized := sanitizeIdentifier(tt.symbol)
+			if tt.expected != sanitized {
+				t.Errorf("Expected sanitized symbol to be '%s', got '%s'", tt.expected, sanitized)
+			}
+		})
+	}
+}
+
+func TestGetTracefsPath(t *testing.T) {
+	_, err := getTracefsPath()
+	qt.Assert(t, err, qt.IsNil)
+}
@@ -0,0 +1,24 @@
+// Code generated by "stringer -type=ProbeType -linecomment"; DO NOT EDIT.
+
+package tracefs
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[Kprobe-0]
+	_ = x[Uprobe-1]
+}
+
+const _ProbeType_name = "kprobeuprobe"
+
+var _ProbeType_index = [...]uint8{0, 6, 12}
+
+func (i ProbeType) String() string {
+	if i >= ProbeType(len(_ProbeType_index)-1) {
+		return "ProbeType(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _ProbeType_name[_ProbeType_index[i]:_ProbeType_index[i+1]]
+}
@@ -0,0 +1,16 @@
+package tracefs
+
+import "fmt"
+
+// UprobeToken creates the PATH:OFFSET(REF_CTR_OFFSET) token for the tracefs api.
+func UprobeToken(args ProbeArgs) string {
+	po := fmt.Sprintf("%s:%#x", args.Path, args.Offset)
+
+	if args.RefCtrOffset != 0 {
+		// This is not documented in Documentation/trace/uprobetracer.txt.
+		// elixir.bootlin.com/linux/v5.15-rc7/source/kernel/trace/trace.c#L5564
+		po += fmt.Sprintf("(%#x)", args.RefCtrOffset)
+	}
+
+	return po
+}
@@ -0,0 +1,29 @@
+package tracefs
+
+import (
+	"fmt"
+	"testing"
+)
+
+func TestUprobeToken(t *testing.T) {
+	tests := []struct {
+		args     ProbeArgs
+		expected string
+	}{
+		{ProbeArgs{Path: "/bin/bash"}, "/bin/bash:0x0"},
+		{ProbeArgs{Path: "/bin/bash", Offset: 1}, "/bin/bash:0x1"},
+		{ProbeArgs{Path: "/bin/bash", Offset: 65535}, "/bin/bash:0xffff"},
+		{ProbeArgs{Path: "/bin/bash", Offset: 65536}, "/bin/bash:0x10000"},
+		{ProbeArgs{Path: "/bin/bash", Offset: 1, RefCtrOffset: 1}, "/bin/bash:0x1(0x1)"},
+		{ProbeArgs{Path: "/bin/bash", Offset: 1, RefCtrOffset: 65535}, "/bin/bash:0x1(0xffff)"},
+	}
+
+	for i, tt := range tests {
+		t.Run(fmt.Sprint(i), func(t *testing.T) {
+			po := UprobeToken(tt.args)
+			if tt.expected != po {
+				t.Errorf("Expected path:offset to be '%s', got '%s'", tt.expected, po)
+			}
+		})
+	}
+}
@@ -0,0 +1,11 @@
+// Package unix re-exports Linux specific parts of golang.org/x/sys/unix.
+//
+// It avoids breaking compilation on other OS by providing stubs as follows:
+//   - Invoking a function always returns an error.
+//   - Errnos have distinct, non-zero values.
+//   - Constants have distinct but meaningless values.
+//   - Types use the same names for members, but may or may not follow the
+//     Linux layout.
+package unix
+
+// Note: please don't add any custom API to this package. Use internal/sys instead.
@@ -0,0 +1,202 @@
+//go:build linux
+
+package unix
+
+import (
+	"syscall"
+
+	linux "golang.org/x/sys/unix"
+)
+
+const (
+	ENOENT     = linux.ENOENT
+	EEXIST     = linux.EEXIST
+	EAGAIN     = linux.EAGAIN
+	ENOSPC     = linux.ENOSPC
+	EINVAL     = linux.EINVAL
+	EPOLLIN    = linux.EPOLLIN
+	EINTR      = linux.EINTR
+	EPERM      = linux.EPERM
+	ESRCH      = linux.ESRCH
+	ENODEV     = linux.ENODEV
+	EBADF      = linux.EBADF
+	E2BIG      = linux.E2BIG
+	EFAULT     = linux.EFAULT
+	EACCES     = linux.EACCES
+	EILSEQ     = linux.EILSEQ
+	EOPNOTSUPP = linux.EOPNOTSUPP
+)
+
+const (
+	BPF_F_NO_PREALLOC         = linux.BPF_F_NO_PREALLOC
+	BPF_F_NUMA_NODE           = linux.BPF_F_NUMA_NODE
+	BPF_F_RDONLY              = linux.BPF_F_RDONLY
+	BPF_F_WRONLY              = linux.BPF_F_WRONLY
+	BPF_F_RDONLY_PROG         = linux.BPF_F_RDONLY_PROG
+	BPF_F_WRONLY_PROG         = linux.BPF_F_WRONLY_PROG
+	BPF_F_SLEEPABLE           = linux.BPF_F_SLEEPABLE
+	BPF_F_XDP_HAS_FRAGS       = linux.BPF_F_XDP_HAS_FRAGS
+	BPF_F_MMAPABLE            = linux.BPF_F_MMAPABLE
+	BPF_F_INNER_MAP           = linux.BPF_F_INNER_MAP
+	BPF_F_KPROBE_MULTI_RETURN = linux.BPF_F_KPROBE_MULTI_RETURN
+	BPF_OBJ_NAME_LEN          = linux.BPF_OBJ_NAME_LEN
+	BPF_TAG_SIZE              = linux.BPF_TAG_SIZE
+	BPF_RINGBUF_BUSY_BIT      = linux.BPF_RINGBUF_BUSY_BIT
+	BPF_RINGBUF_DISCARD_BIT   = linux.BPF_RINGBUF_DISCARD_BIT
+	BPF_RINGBUF_HDR_SZ        = linux.BPF_RINGBUF_HDR_SZ
+	SYS_BPF                   = linux.SYS_BPF
+	F_DUPFD_CLOEXEC           = linux.F_DUPFD_CLOEXEC
+	EPOLL_CTL_ADD             = linux.EPOLL_CTL_ADD
+	EPOLL_CLOEXEC             = linux.EPOLL_CLOEXEC
+	O_CLOEXEC                 = linux.O_CLOEXEC
+	O_NONBLOCK                = linux.O_NONBLOCK
+	PROT_NONE                 = linux.PROT_NONE
+	PROT_READ                 = linux.PROT_READ
+	PROT_WRITE                = linux.PROT_WRITE
+	MAP_ANON                  = linux.MAP_ANON
+	MAP_SHARED                = linux.MAP_SHARED
+	MAP_PRIVATE               = linux.MAP_PRIVATE
+	PERF_ATTR_SIZE_VER1       = linux.PERF_ATTR_SIZE_VER1
+	PERF_TYPE_SOFTWARE        = linux.PERF_TYPE_SOFTWARE
+	PERF_TYPE_TRACEPOINT      = linux.PERF_TYPE_TRACEPOINT
+	PERF_COUNT_SW_BPF_OUTPUT  = linux.PERF_COUNT_SW_BPF_OUTPUT
+	PERF_EVENT_IOC_DISABLE    = linux.PERF_EVENT_IOC_DISABLE
+	PERF_EVENT_IOC_ENABLE     = linux.PERF_EVENT_IOC_ENABLE
+	PERF_EVENT_IOC_SET_BPF    = linux.PERF_EVENT_IOC_SET_BPF
+	PerfBitWatermark          = linux.PerfBitWatermark
+	PerfBitWriteBackward      = linux.PerfBitWriteBackward
+	PERF_SAMPLE_RAW           = linux.PERF_SAMPLE_RAW
+	PERF_FLAG_FD_CLOEXEC      = linux.PERF_FLAG_FD_CLOEXEC
+	RLIM_INFINITY             = linux.RLIM_INFINITY
+	RLIMIT_MEMLOCK            = linux.RLIMIT_MEMLOCK
+	BPF_STATS_RUN_TIME        = linux.BPF_STATS_RUN_TIME
+	PERF_RECORD_LOST          = linux.PERF_RECORD_LOST
+	PERF_RECORD_SAMPLE        = linux.PERF_RECORD_SAMPLE
+	AT_FDCWD                  = linux.AT_FDCWD
+	RENAME_NOREPLACE          = linux.RENAME_NOREPLACE
+	SO_ATTACH_BPF             = linux.SO_ATTACH_BPF
+	SO_DETACH_BPF             = linux.SO_DETACH_BPF
+	SOL_SOCKET                = linux.SOL_SOCKET
+	SIGPROF                   = linux.SIGPROF
+	SIG_BLOCK                 = linux.SIG_BLOCK
+	SIG_UNBLOCK               = linux.SIG_UNBLOCK
+	EM_NONE                   = linux.EM_NONE
+	EM_BPF                    = linux.EM_BPF
+	BPF_FS_MAGIC              = linux.BPF_FS_MAGIC
+	TRACEFS_MAGIC             = linux.TRACEFS_MAGIC
+	DEBUGFS_MAGIC             = linux.DEBUGFS_MAGIC
+)
+
+type Statfs_t = linux.Statfs_t
+type Stat_t = linux.Stat_t
+type Rlimit = linux.Rlimit
+type Signal = linux.Signal
+type Sigset_t = linux.Sigset_t
+type PerfEventMmapPage = linux.PerfEventMmapPage
+type EpollEvent = linux.EpollEvent
+type PerfEventAttr = linux.PerfEventAttr
+type Utsname = linux.Utsname
+
+func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
+	return linux.Syscall(trap, a1, a2, a3)
+}
+
+func PthreadSigmask(how int, set, oldset *Sigset_t) error {
+	return linux.PthreadSigmask(how, set, oldset)
+}
+
+func FcntlInt(fd uintptr, cmd, arg int) (int, error) {
+	return linux.FcntlInt(fd, cmd, arg)
+}
+
+func IoctlSetInt(fd int, req uint, value int) error {
+	return linux.IoctlSetInt(fd, req, value)
+}
+
+func Statfs(path string, buf *Statfs_t) (err error) {
+	return linux.Statfs(path, buf)
+}
+
+func Close(fd int) (err error) {
+	return linux.Close(fd)
+}
+
+func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) {
+	return linux.EpollWait(epfd, events, msec)
+}
+
+func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) {
+	return linux.EpollCtl(epfd, op, fd, event)
+}
+
+func Eventfd(initval uint, flags int) (fd int, err error) {
+	return linux.Eventfd(initval, flags)
+}
+
+func Write(fd int, p []byte) (n int, err error) {
+	return linux.Write(fd, p)
+}
+
+func EpollCreate1(flag int) (fd int, err error) {
+	return linux.EpollCreate1(flag)
+}
+
+func SetNonblock(fd int, nonblocking bool) (err error) {
+	return linux.SetNonblock(fd, nonblocking)
+}
+
+func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) {
+	return linux.Mmap(fd, offset, length, prot, flags)
+}
+
+func Munmap(b []byte) (err error) {
+	return linux.Munmap(b)
+}
+
+func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) {
+	return linux.PerfEventOpen(attr, pid, cpu, groupFd, flags)
+}
+
+func Uname(buf *Utsname) (err error) {
+	return linux.Uname(buf)
+}
+
+func Getpid() int {
+	return linux.Getpid()
+}
+
+func Gettid() int {
+	return linux.Gettid()
+}
+
+func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) {
+	return linux.Tgkill(tgid, tid, sig)
+}
+
+func BytePtrFromString(s string) (*byte, error) {
+	return linux.BytePtrFromString(s)
+}
+
+func ByteSliceToString(s []byte) string {
+	return linux.ByteSliceToString(s)
+}
+
+func Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags uint) error {
+	return linux.Renameat2(olddirfd, oldpath, newdirfd, newpath, flags)
+}
+
+func Prlimit(pid, resource int, new, old *Rlimit) error {
+	return linux.Prlimit(pid, resource, new, old)
+}
+
+func Open(path string, mode int, perm uint32) (int, error) {
+	return linux.Open(path, mode, perm)
+}
+
+func Fstat(fd int, stat *Stat_t) error {
+	return linux.Fstat(fd, stat)
+}
+
+func SetsockoptInt(fd, level, opt, value int) error {
+	return linux.SetsockoptInt(fd, level, opt, value)
+}
@@ -0,0 +1,294 @@
+//go:build !linux
+
+package unix
+
+import (
+	"fmt"
+	"runtime"
+	"syscall"
+)
+
+var errNonLinux = fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
+
+// Errnos are distinct and non-zero.
+const (
+	ENOENT syscall.Errno = iota + 1
+	EEXIST
+	EAGAIN
+	ENOSPC
+	EINVAL
+	EINTR
+	EPERM
+	ESRCH
+	ENODEV
+	EBADF
+	E2BIG
+	EFAULT
+	EACCES
+	EILSEQ
+	EOPNOTSUPP
+)
+
+// Constants are distinct to avoid breaking switch statements.
+const (
+	BPF_F_NO_PREALLOC = iota
+	BPF_F_NUMA_NODE
+	BPF_F_RDONLY
+	BPF_F_WRONLY
+	BPF_F_RDONLY_PROG
+	BPF_F_WRONLY_PROG
+	BPF_F_SLEEPABLE
+	BPF_F_MMAPABLE
+	BPF_F_INNER_MAP
+	BPF_F_KPROBE_MULTI_RETURN
+	BPF_F_XDP_HAS_FRAGS
+	BPF_OBJ_NAME_LEN
+	BPF_TAG_SIZE
+	BPF_RINGBUF_BUSY_BIT
+	BPF_RINGBUF_DISCARD_BIT
+	BPF_RINGBUF_HDR_SZ
+	SYS_BPF
+	F_DUPFD_CLOEXEC
+	EPOLLIN
+	EPOLL_CTL_ADD
+	EPOLL_CLOEXEC
+	O_CLOEXEC
+	O_NONBLOCK
+	PROT_NONE
+	PROT_READ
+	PROT_WRITE
+	MAP_ANON
+	MAP_SHARED
+	MAP_PRIVATE
+	PERF_ATTR_SIZE_VER1
+	PERF_TYPE_SOFTWARE
+	PERF_TYPE_TRACEPOINT
+	PERF_COUNT_SW_BPF_OUTPUT
+	PERF_EVENT_IOC_DISABLE
+	PERF_EVENT_IOC_ENABLE
+	PERF_EVENT_IOC_SET_BPF
+	PerfBitWatermark
+	PerfBitWriteBackward
+	PERF_SAMPLE_RAW
+	PERF_FLAG_FD_CLOEXEC
+	RLIM_INFINITY
+	RLIMIT_MEMLOCK
+	BPF_STATS_RUN_TIME
+	PERF_RECORD_LOST
+	PERF_RECORD_SAMPLE
+	AT_FDCWD
+	RENAME_NOREPLACE
+	SO_ATTACH_BPF
+	SO_DETACH_BPF
+	SOL_SOCKET
+	SIGPROF
+	SIG_BLOCK
+	SIG_UNBLOCK
+	EM_NONE
+	EM_BPF
+	BPF_FS_MAGIC
+	TRACEFS_MAGIC
+	DEBUGFS_MAGIC
+)
+
+type Statfs_t struct {
+	Type    int64
+	Bsize   int64
+	Blocks  uint64
+	Bfree   uint64
+	Bavail  uint64
+	Files   uint64
+	Ffree   uint64
+	Fsid    [2]int32
+	Namelen int64
+	Frsize  int64
+	Flags   int64
+	Spare   [4]int64
+}
+
+type Stat_t struct {
+	Dev     uint64
+	Ino     uint64
+	Nlink   uint64
+	Mode    uint32
+	Uid     uint32
+	Gid     uint32
+	_       int32
+	Rdev    uint64
+	Size    int64
+	Blksize int64
+	Blocks  int64
+}
+
+type Rlimit struct {
+	Cur uint64
+	Max uint64
+}
+
+type Signal int
+
+type Sigset_t struct {
+	Val [4]uint64
+}
+
+func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
+	return 0, 0, syscall.ENOTSUP
+}
+
+func PthreadSigmask(how int, set, oldset *Sigset_t) error {
+	return errNonLinux
+}
+
+func FcntlInt(fd uintptr, cmd, arg int) (int, error) {
+	return -1, errNonLinux
+}
+
+func IoctlSetInt(fd int, req uint, value int) error {
+	return errNonLinux
+}
+
+func Statfs(path string, buf *Statfs_t) error {
+	return errNonLinux
+}
+
+func Close(fd int) (err error) {
+	return errNonLinux
+}
+
+type EpollEvent struct {
+	Events uint32
+	Fd     int32
+	Pad    int32
+}
+
+func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) {
+	return 0, errNonLinux
+}
+
+func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) {
+	return errNonLinux
+}
+
+func Eventfd(initval uint, flags int) (fd int, err error) {
+	return 0, errNonLinux
+}
+
+func Write(fd int, p []byte) (n int, err error) {
+	return 0, errNonLinux
+}
+
+func EpollCreate1(flag int) (fd int, err error) {
+	return 0, errNonLinux
+}
+
+type PerfEventMmapPage struct {
+	Version        uint32
+	Compat_version uint32
+	Lock           uint32
+	Index          uint32
+	Offset         int64
+	Time_enabled   uint64
+	Time_running   uint64
+	Capabilities   uint64
+	Pmc_width      uint16
+	Time_shift     uint16
+	Time_mult      uint32
+	Time_offset    uint64
+	Time_zero      uint64
+	Size           uint32
+
+	Data_head   uint64
+	Data_tail   uint64
+	Data_offset uint64
+	Data_size   uint64
+	Aux_head    uint64
+	Aux_tail    uint64
+	Aux_offset  uint64
+	Aux_size    uint64
+}
+
+func SetNonblock(fd int, nonblocking bool) (err error) {
+	return errNonLinux
+}
+
+func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) {
+	return []byte{}, errNonLinux
+}
+
+func Munmap(b []byte) (err error) {
+	return errNonLinux
+}
+
+type PerfEventAttr struct {
+	Type               uint32
+	Size               uint32
+	Config             uint64
+	Sample             uint64
+	Sample_type        uint64
+	Read_format        uint64
+	Bits               uint64
+	Wakeup             uint32
+	Bp_type            uint32
+	Ext1               uint64
+	Ext2               uint64
+	Branch_sample_type uint64
+	Sample_regs_user   uint64
+	Sample_stack_user  uint32
+	Clockid            int32
+	Sample_regs_intr   uint64
+	Aux_watermark      uint32
+	Sample_max_stack   uint16
+}
+
+func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) {
+	return 0, errNonLinux
+}
+
+type Utsname struct {
+	Release [65]byte
+	Version [65]byte
+}
+
+func Uname(buf *Utsname) (err error) {
+	return errNonLinux
+}
+
+func Getpid() int {
+	return -1
+}
+
+func Gettid() int {
+	return -1
+}
+
+func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) {
+	return errNonLinux
+}
+
+func BytePtrFromString(s string) (*byte, error) {
+	return nil, errNonLinux
+}
+
+func ByteSliceToString(s []byte) string {
+	return ""
+}
+
+func Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags uint) error {
+	return errNonLinux
+}
+
+func Prlimit(pid, resource int, new, old *Rlimit) error {
+	return errNonLinux
+}
+
+func Open(path string, mode int, perm uint32) (int, error) {
+	return -1, errNonLinux
+}
+
+func Fstat(fd int, stat *Stat_t) error {
+	return errNonLinux
+}
+
+func SetsockoptInt(fd, level, opt, value int) error {
+	return errNonLinux
+}
@@ -0,0 +1,153 @@
+package internal
+
+import (
+	"debug/elf"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"io"
+	"math"
+	"os"
+
+	"github.com/cilium/ebpf/internal/unix"
+)
+
+var (
+	errAuxvNoVDSO = errors.New("no vdso address found in auxv")
+)
+
+// vdsoVersion returns the LINUX_VERSION_CODE embedded in the vDSO library
+// linked into the current process image.
+func vdsoVersion() (uint32, error) {
+	// Read data from the auxiliary vector, which is normally passed directly
+	// to the process. Go does not expose that data, so we must read it from procfs.
+	// https://man7.org/linux/man-pages/man3/getauxval.3.html
+	av, err := os.Open("/proc/self/auxv")
+	if errors.Is(err, unix.EACCES) {
+		return 0, fmt.Errorf("opening auxv: %w (process may not be dumpable due to file capabilities)", err)
+	}
+	if err != nil {
+		return 0, fmt.Errorf("opening auxv: %w", err)
+	}
+	defer av.Close()
+
+	vdsoAddr, err := vdsoMemoryAddress(av)
+	if err != nil {
+		return 0, fmt.Errorf("finding vDSO memory address: %w", err)
+	}
+
+	// Use /proc/self/mem rather than unsafe.Pointer tricks.
+	mem, err := os.Open("/proc/self/mem")
+	if err != nil {
+		return 0, fmt.Errorf("opening mem: %w", err)
+	}
+	defer mem.Close()
+
+	// Open ELF at provided memory address, as offset into /proc/self/mem.
+	c, err := vdsoLinuxVersionCode(io.NewSectionReader(mem, int64(vdsoAddr), math.MaxInt64))
+	if err != nil {
+		return 0, fmt.Errorf("reading linux version code: %w", err)
+	}
+
+	return c, nil
+}
+
+// vdsoMemoryAddress returns the memory address of the vDSO library
+// linked into the current process image. r is an io.Reader into an auxv blob.
+func vdsoMemoryAddress(r io.Reader) (uint64, error) {
+	const (
+		_AT_NULL         = 0  // End of vector
+		_AT_SYSINFO_EHDR = 33 // Offset to vDSO blob in process image
+	)
+
+	// Loop through all tag/value pairs in auxv until we find `AT_SYSINFO_EHDR`,
+	// the address of a page containing the virtual Dynamic Shared Object (vDSO).
+	aux := struct{ Tag, Val uint64 }{}
+	for {
+		if err := binary.Read(r, NativeEndian, &aux); err != nil {
+			return 0, fmt.Errorf("reading auxv entry: %w", err)
+		}
+
+		switch aux.Tag {
+		case _AT_SYSINFO_EHDR:
+			if aux.Val != 0 {
+				return aux.Val, nil
+			}
+			return 0, fmt.Errorf("invalid vDSO address in auxv")
+		// _AT_NULL is always the last tag/val pair in the aux vector
+		// and can be treated like EOF.
+		case _AT_NULL:
+			return 0, errAuxvNoVDSO
+		}
+	}
+}
+
+// format described at https://www.man7.org/linux/man-pages/man5/elf.5.html in section 'Notes (Nhdr)'
+type elfNoteHeader struct {
+	NameSize int32
+	DescSize int32
+	Type     int32
+}
+
+// vdsoLinuxVersionCode returns the LINUX_VERSION_CODE embedded in
+// the ELF notes section of the binary provided by the reader.
+func vdsoLinuxVersionCode(r io.ReaderAt) (uint32, error) {
+	hdr, err := NewSafeELFFile(r)
+	if err != nil {
+		return 0, fmt.Errorf("reading vDSO ELF: %w", err)
+	}
+
+	sections := hdr.SectionsByType(elf.SHT_NOTE)
+	if len(sections) == 0 {
+		return 0, fmt.Errorf("no note section found in vDSO ELF")
+	}
+
+	for _, sec := range sections {
+		sr := sec.Open()
+		var n elfNoteHeader
+
+		// Read notes until we find one named 'Linux'.
+		for {
+			if err := binary.Read(sr, hdr.ByteOrder, &n); err != nil {
+				if errors.Is(err, io.EOF) {
+					// We looked at all the notes in this section
+					break
+				}
+				return 0, fmt.Errorf("reading note header: %w", err)
+			}
+
+			// If a note name is defined, it follows the note header.
+			var name string
+			if n.NameSize > 0 {
+				// Read the note name, aligned to 4 bytes.
+				buf := make([]byte, Align(n.NameSize, 4))
+				if err := binary.Read(sr, hdr.ByteOrder, &buf); err != nil {
+					return 0, fmt.Errorf("reading note name: %w", err)
+				}
+
+				// Read nul-terminated string.
+				name = unix.ByteSliceToString(buf[:n.NameSize])
+			}
+
+			// If a note descriptor is defined, it follows the name.
+			// It is possible for a note to have a descriptor but not a name.
+			if n.DescSize > 0 {
+				// LINUX_VERSION_CODE is a uint32 value.
+				if name == "Linux" && n.DescSize == 4 && n.Type == 0 {
+					var version uint32
+					if err := binary.Read(sr, hdr.ByteOrder, &version); err != nil {
+						return 0, fmt.Errorf("reading note descriptor: %w", err)
+					}
+					return version, nil
+				}
+
+				// Discard the note descriptor if it exists but we're not interested in it.
+				if _, err := io.CopyN(io.Discard, sr, int64(Align(n.DescSize, 4))); err != nil {
+					return 0, err
+				}
+			}
+		}
+	}
+
+	return 0, fmt.Errorf("no Linux note in ELF")
+}
@@ -0,0 +1,74 @@
+package internal
+
+import (
+	"errors"
+	"os"
+	"testing"
+)
+
+func TestAuxvVDSOMemoryAddress(t *testing.T) {
+	av, err := os.Open("../testdata/auxv.bin")
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Cleanup(func() { av.Close() })
+
+	addr, err := vdsoMemoryAddress(av)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	expected := uint64(0x7ffd377e5000)
+	if addr != expected {
+		t.Errorf("Expected vDSO memory address %x, got %x", expected, addr)
+	}
+}
+
+func TestAuxvNoVDSO(t *testing.T) {
+	// Copy of auxv.bin with the vDSO pointer removed.
+	av, err := os.Open("../testdata/auxv_no_vdso.bin")
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Cleanup(func() { av.Close() })
+
+	_, err = vdsoMemoryAddress(av)
+	if want, got := errAuxvNoVDSO, err; !errors.Is(got, want) {
+		t.Fatalf("expected error '%v', got: %v", want, got)
+	}
+}
+
+func TestLinuxVersionCodeEmbedded(t *testing.T) {
+	tests := []struct {
+		file    string
+		version uint32
+	}{
+		{
+			"../testdata/vdso.bin",
+			uint32(328828), // 5.4.124
+		},
+		{
+			"../testdata/vdso_multiple_notes.bin",
+			uint32(328875), // Container Optimized OS v85 with a 5.4.x kernel
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.file, func(t *testing.T) {
+			vdso, err := os.Open(test.file)
+			if err != nil {
+				t.Fatal(err)
+			}
+			defer vdso.Close()
+
+			vc, err := vdsoLinuxVersionCode(vdso)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			if vc != test.version {
+				t.Errorf("Expected version code %d, got %d", test.version, vc)
+			}
+		})
+	}
+}
@@ -0,0 +1,106 @@
+package internal
+
+import (
+	"fmt"
+
+	"github.com/cilium/ebpf/internal/unix"
+)
+
+const (
+	// Version constant used in ELF binaries indicating that the loader needs to
+	// substitute the eBPF program's version with the value of the kernel's
+	// KERNEL_VERSION compile-time macro. Used for compatibility with BCC, gobpf
+	// and RedSift.
+	MagicKernelVersion = 0xFFFFFFFE
+)
+
+// A Version in the form Major.Minor.Patch.
+type Version [3]uint16
+
+// NewVersion creates a version from a string like "Major.Minor.Patch".
+//
+// Patch is optional.
+func NewVersion(ver string) (Version, error) {
+	var major, minor, patch uint16
+	n, _ := fmt.Sscanf(ver, "%d.%d.%d", &major, &minor, &patch)
+	if n < 2 {
+		return Version{}, fmt.Errorf("invalid version: %s", ver)
+	}
+	return Version{major, minor, patch}, nil
+}
+
+// NewVersionFromCode creates a version from a LINUX_VERSION_CODE.
+func NewVersionFromCode(code uint32) Version {
+	return Version{
+		uint16(uint8(code >> 16)),
+		uint16(uint8(code >> 8)),
+		uint16(uint8(code)),
+	}
+}
+
+func (v Version) String() string {
+	if v[2] == 0 {
+		return fmt.Sprintf("v%d.%d", v[0], v[1])
+	}
+	return fmt.Sprintf("v%d.%d.%d", v[0], v[1], v[2])
+}
+
+// Less returns true if the version is less than another version.
+func (v Version) Less(other Version) bool {
+	for i, a := range v {
+		if a == other[i] {
+			continue
+		}
+		return a < other[i]
+	}
+	return false
+}
+
+// Unspecified returns true if the version is all zero.
+func (v Version) Unspecified() bool {
+	return v[0] == 0 && v[1] == 0 && v[2] == 0
+}
+
+// Kernel implements the kernel's KERNEL_VERSION macro from linux/version.h.
+// It represents the kernel version and patch level as a single value.
+func (v Version) Kernel() uint32 {
+
+	// Kernels 4.4 and 4.9 have their SUBLEVEL clamped to 255 to avoid
+	// overflowing into PATCHLEVEL.
+	// See kernel commit 9b82f13e7ef3 ("kbuild: clamp SUBLEVEL to 255").
+	s := v[2]
+	if s > 255 {
+		s = 255
+	}
+
+	// Truncate members to uint8 to prevent them from spilling over into
+	// each other when overflowing 8 bits.
+	return uint32(uint8(v[0]))<<16 | uint32(uint8(v[1]))<<8 | uint32(uint8(s))
+}
+
+// KernelVersion returns the version of the currently running kernel.
+var KernelVersion = Memoize(func() (Version, error) {
+	return detectKernelVersion()
+})
+
+// detectKernelVersion returns the version of the running kernel.
+func detectKernelVersion() (Version, error) {
+	vc, err := vdsoVersion()
+	if err != nil {
+		return Version{}, err
+	}
+	return NewVersionFromCode(vc), nil
+}
+
+// KernelRelease returns the release string of the running kernel.
+// Its format depends on the Linux distribution and corresponds to directory
+// names in /lib/modules by convention. Some examples are 5.15.17-1-lts and
+// 4.19.0-16-amd64.
+func KernelRelease() (string, error) {
+	var uname unix.Utsname
+	if err := unix.Uname(&uname); err != nil {
+		return "", fmt.Errorf("uname failed: %w", err)
+	}
+
+	return unix.ByteSliceToString(uname.Release[:]), nil
+}
@@ -0,0 +1,97 @@
+package internal
+
+import (
+	"os"
+	"testing"
+)
+
+func TestVersion(t *testing.T) {
+	a, err := NewVersion("1.2")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	b, err := NewVersion("2.2.1")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if !a.Less(b) {
+		t.Error("A should be less than B")
+	}
+
+	if b.Less(a) {
+		t.Error("B shouldn't be less than A")
+	}
+
+	v200 := Version{2, 0, 0}
+	if !a.Less(v200) {
+		t.Error("1.2.1 should not be less than 2.0.0")
+	}
+
+	if v200.Less(a) {
+		t.Error("2.0.0 should not be less than 1.2.1")
+	}
+}
+
+func TestKernelVersion(t *testing.T) {
+	// Kernels 4.4 and 4.9 have a SUBLEVEL of over 255 and clamp it to 255.
+	// In our implementation, the other version segments are truncated.
+	if v, want := (Version{256, 256, 256}), uint32(255); v.Kernel() != want {
+		t.Errorf("256.256.256 should result in a kernel version of %d, got: %d", want, v.Kernel())
+	}
+
+	// Known good version.
+	if v, want := (Version{4, 9, 128}), uint32(264576); v.Kernel() != want {
+		t.Errorf("4.9.1 should result in a kernel version of %d, got: %d", want, v.Kernel())
+	}
+}
+
+func TestCurrentKernelVersion(t *testing.T) {
+	v, err := KernelVersion()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if evStr := os.Getenv("KERNEL_VERSION"); evStr != "" {
+		ev, err := NewVersion(evStr)
+		if err != nil {
+			t.Fatal(err)
+		}
+		if ev[0] != v[0] || ev[1] != v[1] {
+			t.Errorf("expected kernel version %d.%d, got %d.%d", ev[0], ev[1], v[0], v[1])
+		}
+	}
+}
+
+func TestVersionFromCode(t *testing.T) {
+	var tests = []struct {
+		name string
+		code uint32
+		v    Version
+	}{
+		{"0.0.0", 0, Version{0, 0, 0}},
+		{"1.0.0", 0x10000, Version{1, 0, 0}},
+		{"4.4.255", 0x404ff, Version{4, 4, 255}},
+		{"255.255.255", 0xffffff, Version{255, 255, 255}},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			v := NewVersionFromCode(tt.code)
+			if v != tt.v {
+				t.Errorf("unexpected version for code '%d'. got: %v, want: %v", tt.code, v, tt.v)
+			}
+		})
+	}
+}
+
+func TestKernelRelease(t *testing.T) {
+	r, err := KernelRelease()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if r == "" {
+		t.Fatal("unexpected empty kernel release")
+	}
+}