whatcanGOwrong

This commit is contained in:
2024-09-19 21:38:24 -04:00
commit d0ae4d841d
17908 changed files with 4096831 additions and 0 deletions
@@ -0,0 +1,8 @@
package internal
import "golang.org/x/exp/constraints"
// Align returns 'n' updated to 'alignment' boundary.
func Align[I constraints.Integer](n, alignment I) I {
return (n + alignment - 1) / alignment * alignment
}
@@ -0,0 +1,31 @@
package internal
import (
"bytes"
"sync"
)
var bytesBufferPool = sync.Pool{
New: func() interface{} {
return new(bytes.Buffer)
},
}
// NewBuffer retrieves a [bytes.Buffer] from a pool an re-initialises it.
//
// The returned buffer should be passed to [PutBuffer].
func NewBuffer(buf []byte) *bytes.Buffer {
wr := bytesBufferPool.Get().(*bytes.Buffer)
// Reinitialize the Buffer with a new backing slice since it is returned to
// the caller by wr.Bytes() below. Pooling is faster despite calling
// NewBuffer. The pooled alloc is still reused, it only needs to be zeroed.
*wr = *bytes.NewBuffer(buf)
return wr
}
// PutBuffer releases a buffer to the pool.
func PutBuffer(buf *bytes.Buffer) {
// Release reference to the backing buffer.
*buf = *bytes.NewBuffer(nil)
bytesBufferPool.Put(buf)
}
@@ -0,0 +1 @@
gentypes
@@ -0,0 +1,776 @@
// Program gentypes reads a compressed vmlinux .BTF section and generates
// syscall bindings from it.
//
// Output is written to "types.go".
package main
import (
"bytes"
"errors"
"fmt"
"os"
"sort"
"strings"
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
)
type syscallRetval int
const (
retError syscallRetval = iota
retFd
)
func main() {
if err := run(os.Args[1:]); err != nil {
fmt.Fprintln(os.Stderr, "Error:", err)
os.Exit(1)
}
}
func run(args []string) error {
if len(args) != 1 {
return fmt.Errorf("expect location of compressed vmlinux .BTF as argument")
}
raw, err := internal.ReadAllCompressed(args[0])
if err != nil {
return err
}
spec, err := btf.LoadSpecFromReader(bytes.NewReader(raw))
if err != nil {
return err
}
output, err := generateTypes(spec)
if err != nil {
return err
}
w, err := os.Create("types.go")
if err != nil {
return err
}
defer w.Close()
return internal.WriteFormatted(output, w)
}
func generateTypes(spec *btf.Spec) ([]byte, error) {
objName := &btf.Array{Nelems: 16, Type: &btf.Int{Encoding: btf.Char, Size: 1}}
linkID := &btf.Int{Size: 4}
btfID := &btf.Int{Size: 4}
typeID := &btf.Int{Size: 4}
pointer := &btf.Int{Size: 8}
logLevel := &btf.Int{Size: 4}
mapFlags := &btf.Int{Size: 4}
gf := &btf.GoFormatter{
Names: map[btf.Type]string{
objName: internal.GoTypeName(sys.ObjName{}),
linkID: internal.GoTypeName(sys.LinkID(0)),
btfID: internal.GoTypeName(sys.BTFID(0)),
typeID: internal.GoTypeName(sys.TypeID(0)),
pointer: internal.GoTypeName(sys.Pointer{}),
logLevel: internal.GoTypeName(sys.LogLevel(0)),
mapFlags: internal.GoTypeName(sys.MapFlags(0)),
},
Identifier: internal.Identifier,
EnumIdentifier: func(name, element string) string {
return element
},
}
w := bytes.NewBuffer(nil)
w.WriteString(`// Code generated by internal/cmd/gentypes; DO NOT EDIT.
package sys
import (
"unsafe"
)
`)
enums := []struct {
goType string
cType string
}{
{"Cmd", "bpf_cmd"},
{"MapType", "bpf_map_type"},
{"ProgType", "bpf_prog_type"},
{"AttachType", "bpf_attach_type"},
{"LinkType", "bpf_link_type"},
{"StatsType", "bpf_stats_type"},
{"SkAction", "sk_action"},
{"StackBuildIdStatus", "bpf_stack_build_id_status"},
{"FunctionId", "bpf_func_id"},
{"AdjRoomMode", "bpf_adj_room_mode"},
{"HdrStartOff", "bpf_hdr_start_off"},
{"RetCode", "bpf_ret_code"},
{"XdpAction", "xdp_action"},
}
sort.Slice(enums, func(i, j int) bool {
return enums[i].goType < enums[j].goType
})
enumTypes := make(map[string]btf.Type)
for _, o := range enums {
fmt.Println("enum", o.goType)
var t *btf.Enum
if err := spec.TypeByName(o.cType, &t); err != nil {
return nil, err
}
// Add the enum as a predeclared type so that generated structs
// refer to the Go types.
if name := gf.Names[t]; name != "" {
return nil, fmt.Errorf("type %q is already declared as %s", o.cType, name)
}
gf.Names[t] = o.goType
enumTypes[o.goType] = t
decl, err := gf.TypeDeclaration(o.goType, t)
if err != nil {
return nil, fmt.Errorf("generate %q: %w", o.goType, err)
}
w.WriteString(decl)
w.WriteRune('\n')
}
// Assorted structs
structs := []struct {
goType string
cType string
patches []patch
}{
{
"ProgInfo", "bpf_prog_info",
[]patch{
replace(objName, "name"),
replace(pointer, "xlated_prog_insns"),
replace(pointer, "map_ids"),
replace(btfID, "btf_id"),
},
},
{
"MapInfo", "bpf_map_info",
[]patch{
replace(objName, "name"),
replace(mapFlags, "map_flags"),
replace(typeID, "btf_vmlinux_value_type_id", "btf_key_type_id", "btf_value_type_id"),
},
},
{
"BtfInfo", "bpf_btf_info",
[]patch{
replace(pointer, "btf", "name"),
replace(btfID, "id"),
},
},
{
"LinkInfo", "bpf_link_info",
[]patch{
replace(enumTypes["LinkType"], "type"),
replace(linkID, "id"),
name(3, "extra"),
replaceWithBytes("extra"),
},
},
{"FuncInfo", "bpf_func_info", nil},
{"LineInfo", "bpf_line_info", nil},
{"XdpMd", "xdp_md", nil},
{
"SkLookup", "bpf_sk_lookup",
[]patch{
choose(0, "cookie"),
replaceWithBytes("remote_ip4", "remote_ip6", "local_ip4", "local_ip6"),
},
},
}
sort.Slice(structs, func(i, j int) bool {
return structs[i].goType < structs[j].goType
})
for _, s := range structs {
fmt.Println("struct", s.goType)
var t *btf.Struct
if err := spec.TypeByName(s.cType, &t); err != nil {
return nil, err
}
if err := outputPatchedStruct(gf, w, s.goType, t, s.patches); err != nil {
return nil, fmt.Errorf("output %q: %w", s.goType, err)
}
}
// Attrs
attrs := []struct {
goType string
ret syscallRetval
cType string
cmd string
patches []patch
}{
{
"MapCreate", retFd, "map_create", "BPF_MAP_CREATE",
[]patch{
replace(objName, "map_name"),
replace(enumTypes["MapType"], "map_type"),
replace(mapFlags, "map_flags"),
replace(typeID, "btf_vmlinux_value_type_id", "btf_key_type_id", "btf_value_type_id"),
},
},
{
"MapLookupElem", retError, "map_elem", "BPF_MAP_LOOKUP_ELEM",
[]patch{choose(2, "value"), replace(pointer, "key", "value")},
},
{
"MapLookupAndDeleteElem", retError, "map_elem", "BPF_MAP_LOOKUP_AND_DELETE_ELEM",
[]patch{choose(2, "value"), replace(pointer, "key", "value")},
},
{
"MapUpdateElem", retError, "map_elem", "BPF_MAP_UPDATE_ELEM",
[]patch{choose(2, "value"), replace(pointer, "key", "value")},
},
{
"MapDeleteElem", retError, "map_elem", "BPF_MAP_DELETE_ELEM",
[]patch{choose(2, "value"), replace(pointer, "key", "value")},
},
{
"MapGetNextKey", retError, "map_elem", "BPF_MAP_GET_NEXT_KEY",
[]patch{
choose(2, "next_key"), replace(pointer, "key", "next_key"),
truncateAfter("next_key"),
},
},
{
"MapFreeze", retError, "map_elem", "BPF_MAP_FREEZE",
[]patch{truncateAfter("map_fd")},
},
{
"MapLookupBatch", retError, "map_elem_batch", "BPF_MAP_LOOKUP_BATCH",
[]patch{replace(pointer, "in_batch", "out_batch", "keys", "values")},
},
{
"MapLookupAndDeleteBatch", retError, "map_elem_batch", "BPF_MAP_LOOKUP_AND_DELETE_BATCH",
[]patch{replace(pointer, "in_batch", "out_batch", "keys", "values")},
},
{
"MapUpdateBatch", retError, "map_elem_batch", "BPF_MAP_UPDATE_BATCH",
[]patch{replace(pointer, "in_batch", "out_batch", "keys", "values")},
},
{
"MapDeleteBatch", retError, "map_elem_batch", "BPF_MAP_DELETE_BATCH",
[]patch{replace(pointer, "in_batch", "out_batch", "keys", "values")},
},
{
"ProgLoad", retFd, "prog_load", "BPF_PROG_LOAD",
[]patch{
replace(objName, "prog_name"),
replace(enumTypes["ProgType"], "prog_type"),
replace(enumTypes["AttachType"], "expected_attach_type"),
replace(logLevel, "log_level"),
replace(pointer,
"insns",
"license",
"log_buf",
"func_info",
"line_info",
"fd_array",
"core_relos",
),
replace(typeID, "attach_btf_id"),
choose(20, "attach_btf_obj_fd"),
},
},
{
"ProgBindMap", retError, "prog_bind_map", "BPF_PROG_BIND_MAP",
nil,
},
{
"ObjPin", retError, "obj_pin", "BPF_OBJ_PIN",
[]patch{replace(pointer, "pathname")},
},
{
"ObjGet", retFd, "obj_pin", "BPF_OBJ_GET",
[]patch{replace(pointer, "pathname")},
},
{
"ProgAttach", retError, "prog_attach", "BPF_PROG_ATTACH",
nil,
},
{
"ProgDetach", retError, "prog_attach", "BPF_PROG_DETACH",
[]patch{truncateAfter("attach_type")},
},
{
"ProgRun", retError, "prog_run", "BPF_PROG_TEST_RUN",
[]patch{replace(pointer, "data_in", "data_out", "ctx_in", "ctx_out")},
},
{
"ProgGetNextId", retError, "obj_next_id", "BPF_PROG_GET_NEXT_ID",
[]patch{
choose(0, "start_id"), rename("start_id", "id"),
truncateAfter("next_id"),
},
},
{
"MapGetNextId", retError, "obj_next_id", "BPF_MAP_GET_NEXT_ID",
[]patch{
choose(0, "start_id"), rename("start_id", "id"),
truncateAfter("next_id"),
},
},
{
"BtfGetNextId", retError, "obj_next_id", "BPF_BTF_GET_NEXT_ID",
[]patch{
choose(0, "start_id"), rename("start_id", "id"),
replace(btfID, "id", "next_id"),
truncateAfter("next_id"),
},
},
// These piggy back on the obj_next_id decl, but only support the
// first field...
{
"BtfGetFdById", retFd, "obj_next_id", "BPF_BTF_GET_FD_BY_ID",
[]patch{choose(0, "start_id"), rename("start_id", "id"), truncateAfter("id")},
},
{
"MapGetFdById", retFd, "obj_next_id", "BPF_MAP_GET_FD_BY_ID",
[]patch{choose(0, "start_id"), rename("start_id", "id"), truncateAfter("id")},
},
{
"ProgGetFdById", retFd, "obj_next_id", "BPF_PROG_GET_FD_BY_ID",
[]patch{choose(0, "start_id"), rename("start_id", "id"), truncateAfter("id")},
},
{
"ObjGetInfoByFd", retError, "info_by_fd", "BPF_OBJ_GET_INFO_BY_FD",
[]patch{replace(pointer, "info")},
},
{
"RawTracepointOpen", retFd, "raw_tracepoint_open", "BPF_RAW_TRACEPOINT_OPEN",
[]patch{replace(pointer, "name")},
},
{
"BtfLoad", retFd, "btf_load", "BPF_BTF_LOAD",
[]patch{replace(pointer, "btf", "btf_log_buf")},
},
{
"LinkCreate", retFd, "link_create", "BPF_LINK_CREATE",
[]patch{
replace(enumTypes["AttachType"], "attach_type"),
choose(4, "target_btf_id"),
replace(typeID, "target_btf_id"),
},
},
{
"LinkCreateIter", retFd, "link_create", "BPF_LINK_CREATE",
[]patch{
chooseNth(4, 1),
replace(enumTypes["AttachType"], "attach_type"),
flattenAnon,
replace(pointer, "iter_info"),
},
},
{
"LinkCreatePerfEvent", retFd, "link_create", "BPF_LINK_CREATE",
[]patch{
chooseNth(4, 2),
replace(enumTypes["AttachType"], "attach_type"),
flattenAnon,
},
},
{
"LinkCreateKprobeMulti", retFd, "link_create", "BPF_LINK_CREATE",
[]patch{
chooseNth(4, 3),
replace(enumTypes["AttachType"], "attach_type"),
modify(func(m *btf.Member) error {
return rename("flags", "kprobe_multi_flags")(m.Type.(*btf.Struct))
}, "kprobe_multi"),
flattenAnon,
replace(pointer, "cookies"),
replace(pointer, "addrs"),
replace(pointer, "syms"),
rename("cnt", "count"),
},
},
{
"LinkCreateTracing", retFd, "link_create", "BPF_LINK_CREATE",
[]patch{
chooseNth(4, 4),
replace(enumTypes["AttachType"], "attach_type"),
flattenAnon,
replace(btfID, "target_btf_id"),
},
},
{
"LinkUpdate", retError, "link_update", "BPF_LINK_UPDATE",
nil,
},
{
"EnableStats", retFd, "enable_stats", "BPF_ENABLE_STATS",
nil,
},
{
"IterCreate", retFd, "iter_create", "BPF_ITER_CREATE",
nil,
},
{
"ProgQuery", retError, "prog_query", "BPF_PROG_QUERY",
[]patch{
replace(enumTypes["AttachType"], "attach_type"),
replace(pointer, "prog_ids"),
rename("prog_cnt", "prog_count"),
},
},
}
sort.Slice(attrs, func(i, j int) bool {
return attrs[i].goType < attrs[j].goType
})
var bpfAttr *btf.Union
if err := spec.TypeByName("bpf_attr", &bpfAttr); err != nil {
return nil, err
}
attrTypes, err := splitUnion(bpfAttr, types{
{"map_create", "map_type"},
{"map_elem", "map_fd"},
{"map_elem_batch", "batch"},
{"prog_load", "prog_type"},
{"obj_pin", "pathname"},
{"prog_attach", "target_fd"},
{"prog_run", "test"},
{"obj_next_id", ""},
{"info_by_fd", "info"},
{"prog_query", "query"},
{"raw_tracepoint_open", "raw_tracepoint"},
{"btf_load", "btf"},
{"task_fd_query", "task_fd_query"},
{"link_create", "link_create"},
{"link_update", "link_update"},
{"link_detach", "link_detach"},
{"enable_stats", "enable_stats"},
{"iter_create", "iter_create"},
{"prog_bind_map", "prog_bind_map"},
})
if err != nil {
return nil, fmt.Errorf("splitting bpf_attr: %w", err)
}
for _, s := range attrs {
fmt.Println("attr", s.goType)
t := attrTypes[s.cType]
if t == nil {
return nil, fmt.Errorf("unknown attr %q", s.cType)
}
goAttrType := s.goType + "Attr"
if err := outputPatchedStruct(gf, w, goAttrType, t, s.patches); err != nil {
return nil, fmt.Errorf("output %q: %w", goAttrType, err)
}
switch s.ret {
case retError:
fmt.Fprintf(w, "func %s(attr *%s) error { _, err := BPF(%s, unsafe.Pointer(attr), unsafe.Sizeof(*attr)); return err }\n\n", s.goType, goAttrType, s.cmd)
case retFd:
fmt.Fprintf(w, "func %s(attr *%s) (*FD, error) { fd, err := BPF(%s, unsafe.Pointer(attr), unsafe.Sizeof(*attr)); if err != nil { return nil, err }; return NewFD(int(fd)) }\n\n", s.goType, goAttrType, s.cmd)
}
}
// Link info type specific
linkInfoExtraTypes := []struct {
goType string
cType string
patches []patch
}{
{"CgroupLinkInfo", "cgroup", []patch{replace(enumTypes["AttachType"], "attach_type")}},
{"IterLinkInfo", "iter", []patch{replace(pointer, "target_name"), truncateAfter("target_name_len")}},
{"NetNsLinkInfo", "netns", []patch{replace(enumTypes["AttachType"], "attach_type")}},
{"RawTracepointLinkInfo", "raw_tracepoint", []patch{replace(pointer, "tp_name")}},
{"TracingLinkInfo", "tracing", []patch{
replace(enumTypes["AttachType"], "attach_type"),
replace(typeID, "target_btf_id")},
},
{"XDPLinkInfo", "xdp", nil},
}
sort.Slice(linkInfoExtraTypes, func(i, j int) bool {
return linkInfoExtraTypes[i].goType < linkInfoExtraTypes[j].goType
})
var bpfLinkInfo *btf.Struct
if err := spec.TypeByName("bpf_link_info", &bpfLinkInfo); err != nil {
return nil, err
}
member := bpfLinkInfo.Members[len(bpfLinkInfo.Members)-1]
bpfLinkInfoUnion, ok := member.Type.(*btf.Union)
if !ok {
return nil, fmt.Errorf("there is not type-specific union")
}
linkInfoTypes, err := splitUnion(bpfLinkInfoUnion, types{
{"raw_tracepoint", "raw_tracepoint"},
{"tracing", "tracing"},
{"cgroup", "cgroup"},
{"iter", "iter"},
{"netns", "netns"},
{"xdp", "xdp"},
})
if err != nil {
return nil, fmt.Errorf("splitting linkInfo: %w", err)
}
for _, s := range linkInfoExtraTypes {
t := linkInfoTypes[s.cType]
if err := outputPatchedStruct(gf, w, s.goType, t, s.patches); err != nil {
return nil, fmt.Errorf("output %q: %w", s.goType, err)
}
}
return w.Bytes(), nil
}
func outputPatchedStruct(gf *btf.GoFormatter, w *bytes.Buffer, id string, s *btf.Struct, patches []patch) error {
s = btf.Copy(s, nil).(*btf.Struct)
for i, p := range patches {
if err := p(s); err != nil {
return fmt.Errorf("patch %d: %w", i, err)
}
}
decl, err := gf.TypeDeclaration(id, s)
if err != nil {
return err
}
w.WriteString(decl)
w.WriteString("\n\n")
return nil
}
type types []struct {
name string
cFieldOrFirstMember string
}
func splitUnion(union *btf.Union, types types) (map[string]*btf.Struct, error) {
structs := make(map[string]*btf.Struct)
for i, t := range types {
member := union.Members[i]
s, ok := member.Type.(*btf.Struct)
if !ok {
return nil, fmt.Errorf("%q: %s is not a struct", t.name, member.Type)
}
if member.Name == "" {
// This is an anonymous struct, check the name of the first member instead.
if name := s.Members[0].Name; name != t.cFieldOrFirstMember {
return nil, fmt.Errorf("first field of %q is %q, not %q", t.name, name, t.cFieldOrFirstMember)
}
} else if member.Name != t.cFieldOrFirstMember {
return nil, fmt.Errorf("name for %q is %q, not %q", t.name, member.Name, t.cFieldOrFirstMember)
}
structs[t.name] = s
}
return structs, nil
}
type patch func(*btf.Struct) error
func modify(fn func(*btf.Member) error, members ...string) patch {
return func(s *btf.Struct) error {
want := make(map[string]bool)
for _, name := range members {
want[name] = true
}
for i, m := range s.Members {
if want[m.Name] {
if err := fn(&s.Members[i]); err != nil {
return err
}
delete(want, m.Name)
}
}
if len(want) == 0 {
return nil
}
var missing []string
for name := range want {
missing = append(missing, name)
}
sort.Strings(missing)
return fmt.Errorf("missing members: %v", strings.Join(missing, ", "))
}
}
func modifyNth(fn func(*btf.Member) error, indices ...int) patch {
return func(s *btf.Struct) error {
for _, i := range indices {
if i >= len(s.Members) {
return fmt.Errorf("index %d is out of bounds", i)
}
if err := fn(&s.Members[i]); err != nil {
return fmt.Errorf("member #%d: %w", i, err)
}
}
return nil
}
}
func replace(t btf.Type, members ...string) patch {
return modify(func(m *btf.Member) error {
m.Type = t
return nil
}, members...)
}
func choose(member int, name string) patch {
return modifyNth(func(m *btf.Member) error {
union, ok := m.Type.(*btf.Union)
if !ok {
return fmt.Errorf("member %d is %s, not a union", member, m.Type)
}
for _, um := range union.Members {
if um.Name == name {
m.Name = um.Name
m.Type = um.Type
return nil
}
}
return fmt.Errorf("%s has no member %q", union, name)
}, member)
}
func chooseNth(member int, n int) patch {
return modifyNth(func(m *btf.Member) error {
union, ok := m.Type.(*btf.Union)
if !ok {
return fmt.Errorf("member %d is %s, not a union", member, m.Type)
}
if n >= len(union.Members) {
return fmt.Errorf("member %d is out of bounds", n)
}
um := union.Members[n]
m.Name = um.Name
m.Type = um.Type
return nil
}, member)
}
func flattenAnon(s *btf.Struct) error {
for i := range s.Members {
m := &s.Members[i]
cs, ok := m.Type.(*btf.Struct)
if !ok || cs.TypeName() != "" {
continue
}
for j := range cs.Members {
cs.Members[j].Offset += m.Offset
}
newMembers := make([]btf.Member, 0, len(s.Members)+len(cs.Members)-1)
newMembers = append(newMembers, s.Members[:i]...)
newMembers = append(newMembers, cs.Members...)
newMembers = append(newMembers, s.Members[i+1:]...)
s.Members = newMembers
}
return nil
}
func truncateAfter(name string) patch {
return func(s *btf.Struct) error {
for i, m := range s.Members {
if m.Name != name {
continue
}
size, err := btf.Sizeof(m.Type)
if err != nil {
return err
}
s.Members = s.Members[:i+1]
s.Size = m.Offset.Bytes() + uint32(size)
return nil
}
return fmt.Errorf("no member %q", name)
}
}
func rename(from, to string) patch {
return func(s *btf.Struct) error {
for i, m := range s.Members {
if m.Name == from {
s.Members[i].Name = to
return nil
}
}
return fmt.Errorf("no member named %q", from)
}
}
func name(member int, name string) patch {
return modifyNth(func(m *btf.Member) error {
if m.Name != "" {
return fmt.Errorf("member already has name %q", m.Name)
}
m.Name = name
return nil
}, member)
}
func replaceWithBytes(members ...string) patch {
return modify(func(m *btf.Member) error {
if m.BitfieldSize != 0 {
return errors.New("replaceWithBytes: member is a bitfield")
}
size, err := btf.Sizeof(m.Type)
if err != nil {
return fmt.Errorf("replaceWithBytes: size of %s: %w", m.Type, err)
}
m.Type = &btf.Array{
Type: &btf.Int{Size: 1},
Nelems: uint32(size),
}
return nil
}, members...)
}
@@ -0,0 +1,51 @@
package internal
import (
"fmt"
"os"
"strings"
)
// PossibleCPUs returns the max number of CPUs a system may possibly have
// Logical CPU numbers must be of the form 0-n
var PossibleCPUs = Memoize(func() (int, error) {
return parseCPUsFromFile("/sys/devices/system/cpu/possible")
})
func parseCPUsFromFile(path string) (int, error) {
spec, err := os.ReadFile(path)
if err != nil {
return 0, err
}
n, err := parseCPUs(string(spec))
if err != nil {
return 0, fmt.Errorf("can't parse %s: %v", path, err)
}
return n, nil
}
// parseCPUs parses the number of cpus from a string produced
// by bitmap_list_string() in the Linux kernel.
// Multiple ranges are rejected, since they can't be unified
// into a single number.
// This is the format of /sys/devices/system/cpu/possible, it
// is not suitable for /sys/devices/system/cpu/online, etc.
func parseCPUs(spec string) (int, error) {
if strings.Trim(spec, "\n") == "0" {
return 1, nil
}
var low, high int
n, err := fmt.Sscanf(spec, "%d-%d\n", &low, &high)
if n != 2 || err != nil {
return 0, fmt.Errorf("invalid format: %s", spec)
}
if low != 0 {
return 0, fmt.Errorf("CPU spec doesn't start at zero: %s", spec)
}
// cpus is 0 indexed
return high + 1, nil
}
@@ -0,0 +1,32 @@
package internal
import (
"testing"
)
func TestParseCPUs(t *testing.T) {
for str, result := range map[string]int{
"0-1": 2,
"0-2\n": 3,
"0": 1,
} {
n, err := parseCPUs(str)
if err != nil {
t.Errorf("Can't parse `%s`: %v", str, err)
} else if n != result {
t.Error("Parsing", str, "returns", n, "instead of", result)
}
}
for _, str := range []string{
"0,3-4",
"0-",
"1,",
"",
} {
_, err := parseCPUs(str)
if err == nil {
t.Error("Parsed invalid format:", str)
}
}
}
@@ -0,0 +1,91 @@
package internal
import "math/bits"
// Deque implements a double ended queue.
type Deque[T any] struct {
elems []T
read, write uint64
mask uint64
}
// Reset clears the contents of the deque while retaining the backing buffer.
func (dq *Deque[T]) Reset() {
var zero T
for i := dq.read; i < dq.write; i++ {
dq.elems[i&dq.mask] = zero
}
dq.read, dq.write = 0, 0
}
func (dq *Deque[T]) Empty() bool {
return dq.read == dq.write
}
// Push adds an element to the end.
func (dq *Deque[T]) Push(e T) {
dq.Grow(1)
dq.elems[dq.write&dq.mask] = e
dq.write++
}
// Shift returns the first element or the zero value.
func (dq *Deque[T]) Shift() T {
var zero T
if dq.Empty() {
return zero
}
index := dq.read & dq.mask
t := dq.elems[index]
dq.elems[index] = zero
dq.read++
return t
}
// Pop returns the last element or the zero value.
func (dq *Deque[T]) Pop() T {
var zero T
if dq.Empty() {
return zero
}
dq.write--
index := dq.write & dq.mask
t := dq.elems[index]
dq.elems[index] = zero
return t
}
// Grow the deque's capacity, if necessary, to guarantee space for another n
// elements.
func (dq *Deque[T]) Grow(n int) {
have := dq.write - dq.read
need := have + uint64(n)
if need < have {
panic("overflow")
}
if uint64(len(dq.elems)) >= need {
return
}
// Round up to the new power of two which is at least 8.
// See https://jameshfisher.com/2018/03/30/round-up-power-2/
capacity := 1 << (64 - bits.LeadingZeros64(need-1))
if capacity < 8 {
capacity = 8
}
elems := make([]T, have, capacity)
pivot := dq.read & dq.mask
copied := copy(elems, dq.elems[pivot:])
copy(elems[copied:], dq.elems[:pivot])
dq.elems = elems[:capacity]
dq.mask = uint64(capacity) - 1
dq.read, dq.write = 0, have
}
@@ -0,0 +1,82 @@
package internal
import "testing"
func TestDeque(t *testing.T) {
t.Run("pop", func(t *testing.T) {
var dq Deque[int]
dq.Push(1)
dq.Push(2)
if dq.Pop() != 2 {
t.Error("Didn't pop 2 first")
}
if dq.Pop() != 1 {
t.Error("Didn't pop 1 second")
}
if dq.Pop() != 0 {
t.Error("Didn't pop zero")
}
})
t.Run("shift", func(t *testing.T) {
var td Deque[int]
td.Push(1)
td.Push(2)
if td.Shift() != 1 {
t.Error("Didn't shift 1 first")
}
if td.Shift() != 2 {
t.Error("Didn't shift b second")
}
if td.Shift() != 0 {
t.Error("Didn't shift zero")
}
})
t.Run("push", func(t *testing.T) {
var td Deque[int]
td.Push(1)
td.Push(2)
td.Shift()
for i := 1; i <= 12; i++ {
td.Push(i)
}
if td.Shift() != 2 {
t.Error("Didn't shift 2 first")
}
for i := 1; i <= 12; i++ {
if v := td.Shift(); v != i {
t.Fatalf("Shifted %d at pos %d", v, i)
}
}
})
t.Run("grow", func(t *testing.T) {
var td Deque[int]
td.Push(1)
td.Push(2)
td.Push(3)
td.Shift()
td.Grow(7)
if len(td.elems) < 9 {
t.Fatal("Expected at least 9 elements, got", len(td.elems))
}
if cap(td.elems)&(cap(td.elems)-1) != 0 {
t.Fatalf("Capacity %d is not a power of two", cap(td.elems))
}
if td.Shift() != 2 || td.Shift() != 3 {
t.Fatal("Elements don't match after grow")
}
})
}
@@ -0,0 +1,102 @@
package internal
import (
"debug/elf"
"fmt"
"io"
)
type SafeELFFile struct {
*elf.File
}
// NewSafeELFFile reads an ELF safely.
//
// Any panic during parsing is turned into an error. This is necessary since
// there are a bunch of unfixed bugs in debug/elf.
//
// https://github.com/golang/go/issues?q=is%3Aissue+is%3Aopen+debug%2Felf+in%3Atitle
func NewSafeELFFile(r io.ReaderAt) (safe *SafeELFFile, err error) {
defer func() {
r := recover()
if r == nil {
return
}
safe = nil
err = fmt.Errorf("reading ELF file panicked: %s", r)
}()
file, err := elf.NewFile(r)
if err != nil {
return nil, err
}
return &SafeELFFile{file}, nil
}
// OpenSafeELFFile reads an ELF from a file.
//
// It works like NewSafeELFFile, with the exception that safe.Close will
// close the underlying file.
func OpenSafeELFFile(path string) (safe *SafeELFFile, err error) {
defer func() {
r := recover()
if r == nil {
return
}
safe = nil
err = fmt.Errorf("reading ELF file panicked: %s", r)
}()
file, err := elf.Open(path)
if err != nil {
return nil, err
}
return &SafeELFFile{file}, nil
}
// Symbols is the safe version of elf.File.Symbols.
func (se *SafeELFFile) Symbols() (syms []elf.Symbol, err error) {
defer func() {
r := recover()
if r == nil {
return
}
syms = nil
err = fmt.Errorf("reading ELF symbols panicked: %s", r)
}()
syms, err = se.File.Symbols()
return
}
// DynamicSymbols is the safe version of elf.File.DynamicSymbols.
func (se *SafeELFFile) DynamicSymbols() (syms []elf.Symbol, err error) {
defer func() {
r := recover()
if r == nil {
return
}
syms = nil
err = fmt.Errorf("reading ELF dynamic symbols panicked: %s", r)
}()
syms, err = se.File.DynamicSymbols()
return
}
// SectionsByType returns all sections in the file with the specified section type.
func (se *SafeELFFile) SectionsByType(typ elf.SectionType) []*elf.Section {
sections := make([]*elf.Section, 0, 1)
for _, section := range se.Sections {
if section.Type == typ {
sections = append(sections, section)
}
}
return sections
}
@@ -0,0 +1,12 @@
//go:build armbe || arm64be || mips || mips64 || mips64p32 || ppc64 || s390 || s390x || sparc || sparc64
package internal
import "encoding/binary"
// NativeEndian is set to either binary.BigEndian or binary.LittleEndian,
// depending on the host's endianness.
var NativeEndian binary.ByteOrder = binary.BigEndian
// ClangEndian is set to either "el" or "eb" depending on the host's endianness.
const ClangEndian = "eb"
@@ -0,0 +1,12 @@
//go:build 386 || amd64 || amd64p32 || arm || arm64 || loong64 || mipsle || mips64le || mips64p32le || ppc64le || riscv64
package internal
import "encoding/binary"
// NativeEndian is set to either binary.BigEndian or binary.LittleEndian,
// depending on the host's endianness.
var NativeEndian binary.ByteOrder = binary.LittleEndian
// ClangEndian is set to either "el" or "eb" depending on the host's endianness.
const ClangEndian = "el"
@@ -0,0 +1,225 @@
package epoll
import (
"fmt"
"math"
"os"
"runtime"
"sync"
"time"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/unix"
)
// Poller waits for readiness notifications from multiple file descriptors.
//
// The wait can be interrupted by calling Close.
type Poller struct {
// mutexes protect the fields declared below them. If you need to
// acquire both at once you must lock epollMu before eventMu.
epollMu sync.Mutex
epollFd int
eventMu sync.Mutex
event *eventFd
}
func New() (*Poller, error) {
epollFd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC)
if err != nil {
return nil, fmt.Errorf("create epoll fd: %v", err)
}
p := &Poller{epollFd: epollFd}
p.event, err = newEventFd()
if err != nil {
unix.Close(epollFd)
return nil, err
}
if err := p.Add(p.event.raw, 0); err != nil {
unix.Close(epollFd)
p.event.close()
return nil, fmt.Errorf("add eventfd: %w", err)
}
runtime.SetFinalizer(p, (*Poller).Close)
return p, nil
}
// Close the poller.
//
// Interrupts any calls to Wait. Multiple calls to Close are valid, but subsequent
// calls will return os.ErrClosed.
func (p *Poller) Close() error {
runtime.SetFinalizer(p, nil)
// Interrupt Wait() via the event fd if it's currently blocked.
if err := p.wakeWait(); err != nil {
return err
}
// Acquire the lock. This ensures that Wait isn't running.
p.epollMu.Lock()
defer p.epollMu.Unlock()
// Prevent other calls to Close().
p.eventMu.Lock()
defer p.eventMu.Unlock()
if p.epollFd != -1 {
unix.Close(p.epollFd)
p.epollFd = -1
}
if p.event != nil {
p.event.close()
p.event = nil
}
return nil
}
// Add an fd to the poller.
//
// id is returned by Wait in the unix.EpollEvent.Pad field any may be zero. It
// must not exceed math.MaxInt32.
//
// Add is blocked by Wait.
func (p *Poller) Add(fd int, id int) error {
if int64(id) > math.MaxInt32 {
return fmt.Errorf("unsupported id: %d", id)
}
p.epollMu.Lock()
defer p.epollMu.Unlock()
if p.epollFd == -1 {
return fmt.Errorf("epoll add: %w", os.ErrClosed)
}
// The representation of EpollEvent isn't entirely accurate.
// Pad is fully useable, not just padding. Hence we stuff the
// id in there, which allows us to identify the event later (e.g.,
// in case of perf events, which CPU sent it).
event := unix.EpollEvent{
Events: unix.EPOLLIN,
Fd: int32(fd),
Pad: int32(id),
}
if err := unix.EpollCtl(p.epollFd, unix.EPOLL_CTL_ADD, fd, &event); err != nil {
return fmt.Errorf("add fd to epoll: %v", err)
}
return nil
}
// Wait for events.
//
// Returns the number of pending events or an error wrapping os.ErrClosed if
// Close is called, or os.ErrDeadlineExceeded if EpollWait timeout.
func (p *Poller) Wait(events []unix.EpollEvent, deadline time.Time) (int, error) {
p.epollMu.Lock()
defer p.epollMu.Unlock()
if p.epollFd == -1 {
return 0, fmt.Errorf("epoll wait: %w", os.ErrClosed)
}
for {
timeout := int(-1)
if !deadline.IsZero() {
msec := time.Until(deadline).Milliseconds()
if msec < 0 {
// Deadline is in the past.
msec = 0
} else if msec > math.MaxInt {
// Deadline is too far in the future.
msec = math.MaxInt
}
timeout = int(msec)
}
n, err := unix.EpollWait(p.epollFd, events, timeout)
if temp, ok := err.(temporaryError); ok && temp.Temporary() {
// Retry the syscall if we were interrupted, see https://github.com/golang/go/issues/20400
continue
}
if err != nil {
return 0, err
}
if n == 0 {
return 0, fmt.Errorf("epoll wait: %w", os.ErrDeadlineExceeded)
}
for _, event := range events[:n] {
if int(event.Fd) == p.event.raw {
// Since we don't read p.event the event is never cleared and
// we'll keep getting this wakeup until Close() acquires the
// lock and sets p.epollFd = -1.
return 0, fmt.Errorf("epoll wait: %w", os.ErrClosed)
}
}
return n, nil
}
}
type temporaryError interface {
Temporary() bool
}
// wakeWait unblocks Wait if it's epoll_wait.
func (p *Poller) wakeWait() error {
p.eventMu.Lock()
defer p.eventMu.Unlock()
if p.event == nil {
return fmt.Errorf("epoll wake: %w", os.ErrClosed)
}
return p.event.add(1)
}
// eventFd wraps a Linux eventfd.
//
// An eventfd acts like a counter: writes add to the counter, reads retrieve
// the counter and reset it to zero. Reads also block if the counter is zero.
//
// See man 2 eventfd.
type eventFd struct {
file *os.File
// prefer raw over file.Fd(), since the latter puts the file into blocking
// mode.
raw int
}
func newEventFd() (*eventFd, error) {
fd, err := unix.Eventfd(0, unix.O_CLOEXEC|unix.O_NONBLOCK)
if err != nil {
return nil, err
}
file := os.NewFile(uintptr(fd), "event")
return &eventFd{file, fd}, nil
}
func (efd *eventFd) close() error {
return efd.file.Close()
}
func (efd *eventFd) add(n uint64) error {
var buf [8]byte
internal.NativeEndian.PutUint64(buf[:], 1)
_, err := efd.file.Write(buf[:])
return err
}
func (efd *eventFd) read() (uint64, error) {
var buf [8]byte
_, err := efd.file.Read(buf[:])
return internal.NativeEndian.Uint64(buf[:]), err
}
@@ -0,0 +1,130 @@
package epoll
import (
"errors"
"math"
"os"
"testing"
"time"
"github.com/cilium/ebpf/internal/unix"
)
func TestPoller(t *testing.T) {
t.Parallel()
event, poller := mustNewPoller(t)
done := make(chan struct{}, 1)
read := func() {
defer func() {
done <- struct{}{}
}()
events := make([]unix.EpollEvent, 1)
n, err := poller.Wait(events, time.Time{})
if errors.Is(err, os.ErrClosed) {
return
}
if err != nil {
t.Error("Error from wait:", err)
return
}
if n != 1 {
t.Errorf("Got %d instead of 1 events", n)
}
if e := events[0]; e.Pad != 42 {
t.Errorf("Incorrect value in EpollEvent.Pad: %d != 42", e.Pad)
}
}
if err := event.add(1); err != nil {
t.Fatal(err)
}
go read()
select {
case <-done:
case <-time.After(time.Second):
t.Fatal("Timed out")
}
if _, err := event.read(); err != nil {
t.Fatal(err)
}
go read()
select {
case <-done:
t.Fatal("Wait doesn't block")
case <-time.After(time.Second):
}
if err := poller.Close(); err != nil {
t.Fatal("Close returns an error:", err)
}
select {
case <-done:
case <-time.After(time.Second):
t.Fatal("Close doesn't unblock Wait")
}
if err := poller.Close(); !errors.Is(err, os.ErrClosed) {
t.Fatal("Closing a second time doesn't return ErrClosed:", err)
}
}
func TestPollerDeadline(t *testing.T) {
t.Parallel()
_, poller := mustNewPoller(t)
events := make([]unix.EpollEvent, 1)
_, err := poller.Wait(events, time.Now().Add(-time.Second))
if !errors.Is(err, os.ErrDeadlineExceeded) {
t.Fatal("Expected os.ErrDeadlineExceeded on deadline in the past, got", err)
}
done := make(chan struct{})
go func() {
defer close(done)
_, err := poller.Wait(events, time.Now().Add(math.MaxInt64))
if !errors.Is(err, os.ErrClosed) {
t.Error("Expected os.ErrClosed when interrupting deadline, got", err)
}
}()
// Wait for the goroutine to enter the syscall.
time.Sleep(time.Second)
poller.Close()
<-done
}
func mustNewPoller(t *testing.T) (*eventFd, *Poller) {
t.Helper()
event, err := newEventFd()
if err != nil {
t.Fatal(err)
}
t.Cleanup(func() { event.close() })
poller, err := New()
if err != nil {
t.Fatal(err)
}
t.Cleanup(func() { poller.Close() })
if err := poller.Add(event.raw, 42); err != nil {
t.Fatal("Can't add fd:", err)
}
return event, poller
}
@@ -0,0 +1,198 @@
package internal
import (
"bytes"
"fmt"
"io"
"strings"
)
// ErrorWithLog wraps err in a VerifierError that includes the parsed verifier
// log buffer.
//
// The default error output is a summary of the full log. The latter can be
// accessed via VerifierError.Log or by formatting the error, see Format.
func ErrorWithLog(source string, err error, log []byte, truncated bool) *VerifierError {
const whitespace = "\t\r\v\n "
// Convert verifier log C string by truncating it on the first 0 byte
// and trimming trailing whitespace before interpreting as a Go string.
if i := bytes.IndexByte(log, 0); i != -1 {
log = log[:i]
}
log = bytes.Trim(log, whitespace)
if len(log) == 0 {
return &VerifierError{source, err, nil, truncated}
}
logLines := bytes.Split(log, []byte{'\n'})
lines := make([]string, 0, len(logLines))
for _, line := range logLines {
// Don't remove leading white space on individual lines. We rely on it
// when outputting logs.
lines = append(lines, string(bytes.TrimRight(line, whitespace)))
}
return &VerifierError{source, err, lines, truncated}
}
// VerifierError includes information from the eBPF verifier.
//
// It summarises the log output, see Format if you want to output the full contents.
type VerifierError struct {
source string
// The error which caused this error.
Cause error
// The verifier output split into lines.
Log []string
// Whether the log output is truncated, based on several heuristics.
Truncated bool
}
func (le *VerifierError) Unwrap() error {
return le.Cause
}
func (le *VerifierError) Error() string {
log := le.Log
if n := len(log); n > 0 && strings.HasPrefix(log[n-1], "processed ") {
// Get rid of "processed 39 insns (limit 1000000) ..." from summary.
log = log[:n-1]
}
var b strings.Builder
fmt.Fprintf(&b, "%s: %s", le.source, le.Cause.Error())
n := len(log)
if n == 0 {
return b.String()
}
lines := log[n-1:]
if n >= 2 && (includePreviousLine(log[n-1]) || le.Truncated) {
// Add one more line of context if it aids understanding the error.
lines = log[n-2:]
}
for _, line := range lines {
b.WriteString(": ")
b.WriteString(strings.TrimSpace(line))
}
omitted := len(le.Log) - len(lines)
if omitted == 0 && !le.Truncated {
return b.String()
}
b.WriteString(" (")
if le.Truncated {
b.WriteString("truncated")
}
if omitted > 0 {
if le.Truncated {
b.WriteString(", ")
}
fmt.Fprintf(&b, "%d line(s) omitted", omitted)
}
b.WriteString(")")
return b.String()
}
// includePreviousLine returns true if the given line likely is better
// understood with additional context from the preceding line.
func includePreviousLine(line string) bool {
// We need to find a good trade off between understandable error messages
// and too much complexity here. Checking the string prefix is ok, requiring
// regular expressions to do it is probably overkill.
if strings.HasPrefix(line, "\t") {
// [13] STRUCT drm_rect size=16 vlen=4
// \tx1 type_id=2
return true
}
if len(line) >= 2 && line[0] == 'R' && line[1] >= '0' && line[1] <= '9' {
// 0: (95) exit
// R0 !read_ok
return true
}
if strings.HasPrefix(line, "invalid bpf_context access") {
// 0: (79) r6 = *(u64 *)(r1 +0)
// func '__x64_sys_recvfrom' arg0 type FWD is not a struct
// invalid bpf_context access off=0 size=8
return true
}
return false
}
// Format the error.
//
// Understood verbs are %s and %v, which are equivalent to calling Error(). %v
// allows outputting additional information using the following flags:
//
// %+<width>v: Output the first <width> lines, or all lines if no width is given.
// %-<width>v: Output the last <width> lines, or all lines if no width is given.
//
// Use width to specify how many lines to output. Use the '-' flag to output
// lines from the end of the log instead of the beginning.
func (le *VerifierError) Format(f fmt.State, verb rune) {
switch verb {
case 's':
_, _ = io.WriteString(f, le.Error())
case 'v':
n, haveWidth := f.Width()
if !haveWidth || n > len(le.Log) {
n = len(le.Log)
}
if !f.Flag('+') && !f.Flag('-') {
if haveWidth {
_, _ = io.WriteString(f, "%!v(BADWIDTH)")
return
}
_, _ = io.WriteString(f, le.Error())
return
}
if f.Flag('+') && f.Flag('-') {
_, _ = io.WriteString(f, "%!v(BADFLAG)")
return
}
fmt.Fprintf(f, "%s: %s:", le.source, le.Cause.Error())
omitted := len(le.Log) - n
lines := le.Log[:n]
if f.Flag('-') {
// Print last instead of first lines.
lines = le.Log[len(le.Log)-n:]
if omitted > 0 {
fmt.Fprintf(f, "\n\t(%d line(s) omitted)", omitted)
}
}
for _, line := range lines {
fmt.Fprintf(f, "\n\t%s", line)
}
if !f.Flag('-') {
if omitted > 0 {
fmt.Fprintf(f, "\n\t(%d line(s) omitted)", omitted)
}
}
if le.Truncated {
fmt.Fprintf(f, "\n\t(truncated)")
}
default:
fmt.Fprintf(f, "%%!%c(BADVERB)", verb)
}
}
@@ -0,0 +1,87 @@
package internal
import (
"errors"
"os"
"testing"
"github.com/cilium/ebpf/internal/unix"
qt "github.com/frankban/quicktest"
)
func TestVerifierErrorWhitespace(t *testing.T) {
b := []byte("unreachable insn 28")
b = append(b,
0xa, // \n
0xd, // \r
0x9, // \t
0x20, // space
0, 0, // trailing NUL bytes
)
err := ErrorWithLog("frob", errors.New("test"), b, false)
qt.Assert(t, err.Error(), qt.Equals, "frob: test: unreachable insn 28")
for _, log := range [][]byte{
nil,
[]byte("\x00"),
[]byte(" "),
} {
err = ErrorWithLog("frob", errors.New("test"), log, false)
qt.Assert(t, err.Error(), qt.Equals, "frob: test", qt.Commentf("empty log %q has incorrect format", log))
}
}
func TestVerifierErrorWrapping(t *testing.T) {
ve := ErrorWithLog("frob", unix.ENOENT, nil, false)
qt.Assert(t, ve, qt.ErrorIs, unix.ENOENT, qt.Commentf("should wrap provided error"))
qt.Assert(t, ve.Truncated, qt.IsFalse, qt.Commentf("verifier log should not be marked as truncated"))
ve = ErrorWithLog("frob", unix.EINVAL, nil, true)
qt.Assert(t, ve, qt.ErrorIs, unix.EINVAL, qt.Commentf("should wrap provided error"))
qt.Assert(t, ve.Truncated, qt.IsTrue, qt.Commentf("verifier log should be marked as truncated"))
ve = ErrorWithLog("frob", unix.EINVAL, []byte("foo"), false)
qt.Assert(t, ve, qt.ErrorIs, unix.EINVAL, qt.Commentf("should wrap provided error"))
qt.Assert(t, ve.Error(), qt.Contains, "foo", qt.Commentf("verifier log should appear in error string"))
ve = ErrorWithLog("frob", unix.ENOSPC, []byte("foo"), true)
qt.Assert(t, ve, qt.ErrorIs, unix.ENOSPC, qt.Commentf("should wrap provided error"))
qt.Assert(t, ve.Error(), qt.Contains, "foo", qt.Commentf("verifier log should appear in error string"))
qt.Assert(t, ve.Truncated, qt.IsTrue, qt.Commentf("verifier log should be marked truncated"))
}
func TestVerifierErrorSummary(t *testing.T) {
// Suppress the last line containing 'processed ... insns'.
errno524 := readErrorFromFile(t, "testdata/errno524.log")
qt.Assert(t, errno524.Error(), qt.Contains, "JIT doesn't support bpf-to-bpf calls")
qt.Assert(t, errno524.Error(), qt.Not(qt.Contains), "processed 39 insns")
// Include the previous line if the current one starts with a tab.
invalidMember := readErrorFromFile(t, "testdata/invalid-member.log")
qt.Assert(t, invalidMember.Error(), qt.Contains, "STRUCT task_struct size=7744 vlen=218: cpus_mask type_id=109 bitfield_size=0 bits_offset=7744 Invalid member")
// Only include the last line.
issue43 := readErrorFromFile(t, "testdata/issue-43.log")
qt.Assert(t, issue43.Error(), qt.Contains, "[11] FUNC helper_func2 type_id=10 vlen != 0")
qt.Assert(t, issue43.Error(), qt.Not(qt.Contains), "[10] FUNC_PROTO (anon) return=3 args=(3 arg)")
// Include instruction that caused invalid register access.
invalidR0 := readErrorFromFile(t, "testdata/invalid-R0.log")
qt.Assert(t, invalidR0.Error(), qt.Contains, "0: (95) exit: R0 !read_ok")
// Include symbol that doesn't match context type.
invalidCtx := readErrorFromFile(t, "testdata/invalid-ctx-access.log")
qt.Assert(t, invalidCtx.Error(), qt.Contains, "func '__x64_sys_recvfrom' arg0 type FWD is not a struct: invalid bpf_context access off=0 size=8")
}
func readErrorFromFile(tb testing.TB, file string) *VerifierError {
tb.Helper()
contents, err := os.ReadFile(file)
if err != nil {
tb.Fatal("Read file:", err)
}
return ErrorWithLog("file", unix.EINVAL, contents, false)
}
@@ -0,0 +1,184 @@
package internal
import (
"errors"
"fmt"
"sync"
)
// ErrNotSupported indicates that a feature is not supported by the current kernel.
var ErrNotSupported = errors.New("not supported")
// UnsupportedFeatureError is returned by FeatureTest() functions.
type UnsupportedFeatureError struct {
// The minimum Linux mainline version required for this feature.
// Used for the error string, and for sanity checking during testing.
MinimumVersion Version
// The name of the feature that isn't supported.
Name string
}
func (ufe *UnsupportedFeatureError) Error() string {
if ufe.MinimumVersion.Unspecified() {
return fmt.Sprintf("%s not supported", ufe.Name)
}
return fmt.Sprintf("%s not supported (requires >= %s)", ufe.Name, ufe.MinimumVersion)
}
// Is indicates that UnsupportedFeatureError is ErrNotSupported.
func (ufe *UnsupportedFeatureError) Is(target error) bool {
return target == ErrNotSupported
}
// FeatureTest caches the result of a [FeatureTestFn].
//
// Fields should not be modified after creation.
type FeatureTest struct {
// The name of the feature being detected.
Name string
// Version in in the form Major.Minor[.Patch].
Version string
// The feature test itself.
Fn FeatureTestFn
mu sync.RWMutex
done bool
result error
}
// FeatureTestFn is used to determine whether the kernel supports
// a certain feature.
//
// The return values have the following semantics:
//
// err == ErrNotSupported: the feature is not available
// err == nil: the feature is available
// err != nil: the test couldn't be executed
type FeatureTestFn func() error
// NewFeatureTest is a convenient way to create a single [FeatureTest].
func NewFeatureTest(name, version string, fn FeatureTestFn) func() error {
ft := &FeatureTest{
Name: name,
Version: version,
Fn: fn,
}
return ft.execute
}
// execute the feature test.
//
// The result is cached if the test is conclusive.
//
// See [FeatureTestFn] for the meaning of the returned error.
func (ft *FeatureTest) execute() error {
ft.mu.RLock()
result, done := ft.result, ft.done
ft.mu.RUnlock()
if done {
return result
}
ft.mu.Lock()
defer ft.mu.Unlock()
// The test may have been executed by another caller while we were
// waiting to acquire ft.mu.
if ft.done {
return ft.result
}
err := ft.Fn()
if err == nil {
ft.done = true
return nil
}
if errors.Is(err, ErrNotSupported) {
var v Version
if ft.Version != "" {
v, err = NewVersion(ft.Version)
if err != nil {
return fmt.Errorf("feature %s: %w", ft.Name, err)
}
}
ft.done = true
ft.result = &UnsupportedFeatureError{
MinimumVersion: v,
Name: ft.Name,
}
return ft.result
}
// We couldn't execute the feature test to a point
// where it could make a determination.
// Don't cache the result, just return it.
return fmt.Errorf("detect support for %s: %w", ft.Name, err)
}
// FeatureMatrix groups multiple related feature tests into a map.
//
// Useful when there is a small number of discrete features which are known
// at compile time.
//
// It must not be modified concurrently with calling [FeatureMatrix.Result].
type FeatureMatrix[K comparable] map[K]*FeatureTest
// Result returns the outcome of the feature test for the given key.
//
// It's safe to call this function concurrently.
func (fm FeatureMatrix[K]) Result(key K) error {
ft, ok := fm[key]
if !ok {
return fmt.Errorf("no feature probe for %v", key)
}
return ft.execute()
}
// FeatureCache caches a potentially unlimited number of feature probes.
//
// Useful when there is a high cardinality for a feature test.
type FeatureCache[K comparable] struct {
mu sync.RWMutex
newTest func(K) *FeatureTest
features map[K]*FeatureTest
}
func NewFeatureCache[K comparable](newTest func(K) *FeatureTest) *FeatureCache[K] {
return &FeatureCache[K]{
newTest: newTest,
features: make(map[K]*FeatureTest),
}
}
func (fc *FeatureCache[K]) Result(key K) error {
// NB: Executing the feature test happens without fc.mu taken.
return fc.retrieve(key).execute()
}
func (fc *FeatureCache[K]) retrieve(key K) *FeatureTest {
fc.mu.RLock()
ft := fc.features[key]
fc.mu.RUnlock()
if ft != nil {
return ft
}
fc.mu.Lock()
defer fc.mu.Unlock()
if ft := fc.features[key]; ft != nil {
return ft
}
ft = fc.newTest(key)
fc.features[key] = ft
return ft
}
@@ -0,0 +1,71 @@
package internal
import (
"errors"
"strings"
"testing"
"github.com/cilium/ebpf/internal/testutils/fdtrace"
)
func TestMain(m *testing.M) {
fdtrace.TestMain(m)
}
func TestFeatureTest(t *testing.T) {
var called bool
fn := NewFeatureTest("foo", "1.0", func() error {
called = true
return nil
})
if called {
t.Error("Function was called too early")
}
err := fn()
if !called {
t.Error("Function wasn't called")
}
if err != nil {
t.Error("Unexpected negative result:", err)
}
fn = NewFeatureTest("bar", "2.1.1", func() error {
return ErrNotSupported
})
err = fn()
if err == nil {
t.Fatal("Unexpected positive result")
}
fte, ok := err.(*UnsupportedFeatureError)
if !ok {
t.Fatal("Result is not a *UnsupportedFeatureError")
}
if !strings.Contains(fte.Error(), "2.1.1") {
t.Error("UnsupportedFeatureError.Error doesn't contain version")
}
if !errors.Is(err, ErrNotSupported) {
t.Error("UnsupportedFeatureError is not ErrNotSupported")
}
err2 := fn()
if err != err2 {
t.Error("Didn't cache an error wrapping ErrNotSupported")
}
fn = NewFeatureTest("bar", "2.1.1", func() error {
return errors.New("foo")
})
err1, err2 := fn(), fn()
if err1 == err2 {
t.Error("Cached result of unsuccessful execution")
}
}
@@ -0,0 +1,128 @@
package internal
import (
"bufio"
"bytes"
"compress/gzip"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"sync"
)
// NewBufferedSectionReader wraps an io.ReaderAt in an appropriately-sized
// buffered reader. It is a convenience function for reading subsections of
// ELF sections while minimizing the amount of read() syscalls made.
//
// Syscall overhead is non-negligible in continuous integration context
// where ELFs might be accessed over virtual filesystems with poor random
// access performance. Buffering reads makes sense because (sub)sections
// end up being read completely anyway.
//
// Use instead of the r.Seek() + io.LimitReader() pattern.
func NewBufferedSectionReader(ra io.ReaderAt, off, n int64) *bufio.Reader {
// Clamp the size of the buffer to one page to avoid slurping large parts
// of a file into memory. bufio.NewReader uses a hardcoded default buffer
// of 4096. Allow arches with larger pages to allocate more, but don't
// allocate a fixed 4k buffer if we only need to read a small segment.
buf := n
if ps := int64(os.Getpagesize()); n > ps {
buf = ps
}
return bufio.NewReaderSize(io.NewSectionReader(ra, off, n), int(buf))
}
// DiscardZeroes makes sure that all written bytes are zero
// before discarding them.
type DiscardZeroes struct{}
func (DiscardZeroes) Write(p []byte) (int, error) {
for _, b := range p {
if b != 0 {
return 0, errors.New("encountered non-zero byte")
}
}
return len(p), nil
}
// ReadAllCompressed decompresses a gzipped file into memory.
func ReadAllCompressed(file string) ([]byte, error) {
fh, err := os.Open(file)
if err != nil {
return nil, err
}
defer fh.Close()
gz, err := gzip.NewReader(fh)
if err != nil {
return nil, err
}
defer gz.Close()
return io.ReadAll(gz)
}
// ReadUint64FromFile reads a uint64 from a file.
//
// format specifies the contents of the file in fmt.Scanf syntax.
func ReadUint64FromFile(format string, path ...string) (uint64, error) {
filename := filepath.Join(path...)
data, err := os.ReadFile(filename)
if err != nil {
return 0, fmt.Errorf("reading file %q: %w", filename, err)
}
var value uint64
n, err := fmt.Fscanf(bytes.NewReader(data), format, &value)
if err != nil {
return 0, fmt.Errorf("parsing file %q: %w", filename, err)
}
if n != 1 {
return 0, fmt.Errorf("parsing file %q: expected 1 item, got %d", filename, n)
}
return value, nil
}
type uint64FromFileKey struct {
format, path string
}
var uint64FromFileCache = struct {
sync.RWMutex
values map[uint64FromFileKey]uint64
}{
values: map[uint64FromFileKey]uint64{},
}
// ReadUint64FromFileOnce is like readUint64FromFile but memoizes the result.
func ReadUint64FromFileOnce(format string, path ...string) (uint64, error) {
filename := filepath.Join(path...)
key := uint64FromFileKey{format, filename}
uint64FromFileCache.RLock()
if value, ok := uint64FromFileCache.values[key]; ok {
uint64FromFileCache.RUnlock()
return value, nil
}
uint64FromFileCache.RUnlock()
value, err := ReadUint64FromFile(format, filename)
if err != nil {
return 0, err
}
uint64FromFileCache.Lock()
defer uint64FromFileCache.Unlock()
if value, ok := uint64FromFileCache.values[key]; ok {
// Someone else got here before us, use what is cached.
return value, nil
}
uint64FromFileCache.values[key] = value
return value, nil
}
@@ -0,0 +1,19 @@
package internal
import (
"bytes"
"io"
"testing"
)
func TestDiscardZero(t *testing.T) {
_, err := io.Copy(DiscardZeroes{}, bytes.NewReader([]byte{0, 0, 0}))
if err != nil {
t.Error("Returned an error even though input was zero:", err)
}
_, err = io.Copy(DiscardZeroes{}, bytes.NewReader([]byte{1}))
if err == nil {
t.Error("No error even though input is non-zero")
}
}
@@ -0,0 +1,267 @@
package kconfig
import (
"bufio"
"bytes"
"compress/gzip"
"fmt"
"io"
"math"
"os"
"strconv"
"strings"
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal"
)
// Find find a kconfig file on the host.
// It first reads from /boot/config- of the current running kernel and tries
// /proc/config.gz if nothing was found in /boot.
// If none of the file provide a kconfig, it returns an error.
func Find() (*os.File, error) {
kernelRelease, err := internal.KernelRelease()
if err != nil {
return nil, fmt.Errorf("cannot get kernel release: %w", err)
}
path := "/boot/config-" + kernelRelease
f, err := os.Open(path)
if err == nil {
return f, nil
}
f, err = os.Open("/proc/config.gz")
if err == nil {
return f, nil
}
return nil, fmt.Errorf("neither %s nor /proc/config.gz provide a kconfig", path)
}
// Parse parses the kconfig file for which a reader is given.
// All the CONFIG_* which are in filter and which are set set will be
// put in the returned map as key with their corresponding value as map value.
// If filter is nil, no filtering will occur.
// If the kconfig file is not valid, error will be returned.
func Parse(source io.ReaderAt, filter map[string]struct{}) (map[string]string, error) {
var r io.Reader
zr, err := gzip.NewReader(io.NewSectionReader(source, 0, math.MaxInt64))
if err != nil {
r = io.NewSectionReader(source, 0, math.MaxInt64)
} else {
// Source is gzip compressed, transparently decompress.
r = zr
}
ret := make(map[string]string, len(filter))
s := bufio.NewScanner(r)
for s.Scan() {
line := s.Bytes()
err = processKconfigLine(line, ret, filter)
if err != nil {
return nil, fmt.Errorf("cannot parse line: %w", err)
}
if filter != nil && len(ret) == len(filter) {
break
}
}
if err := s.Err(); err != nil {
return nil, fmt.Errorf("cannot parse: %w", err)
}
if zr != nil {
return ret, zr.Close()
}
return ret, nil
}
// Golang translation of libbpf bpf_object__process_kconfig_line():
// https://github.com/libbpf/libbpf/blob/fbd60dbff51c870f5e80a17c4f2fd639eb80af90/src/libbpf.c#L1874
// It does the same checks but does not put the data inside the BPF map.
func processKconfigLine(line []byte, m map[string]string, filter map[string]struct{}) error {
// Ignore empty lines and "# CONFIG_* is not set".
if !bytes.HasPrefix(line, []byte("CONFIG_")) {
return nil
}
key, value, found := bytes.Cut(line, []byte{'='})
if !found {
return fmt.Errorf("line %q does not contain separator '='", line)
}
if len(value) == 0 {
return fmt.Errorf("line %q has no value", line)
}
if filter != nil {
// NB: map[string(key)] gets special optimisation help from the compiler
// and doesn't allocate. Don't turn this into a variable.
_, ok := filter[string(key)]
if !ok {
return nil
}
}
// This can seem odd, but libbpf only sets the value the first time the key is
// met:
// https://github.com/torvalds/linux/blob/0d85b27b0cc6/tools/lib/bpf/libbpf.c#L1906-L1908
_, ok := m[string(key)]
if !ok {
m[string(key)] = string(value)
}
return nil
}
// PutValue translates the value given as parameter depending on the BTF
// type, the translated value is then written to the byte array.
func PutValue(data []byte, typ btf.Type, value string) error {
typ = btf.UnderlyingType(typ)
switch value {
case "y", "n", "m":
return putValueTri(data, typ, value)
default:
if strings.HasPrefix(value, `"`) {
return putValueString(data, typ, value)
}
return putValueNumber(data, typ, value)
}
}
// Golang translation of libbpf_tristate enum:
// https://github.com/libbpf/libbpf/blob/fbd60dbff51c870f5e80a17c4f2fd639eb80af90/src/bpf_helpers.h#L169
type triState int
const (
TriNo triState = 0
TriYes triState = 1
TriModule triState = 2
)
func putValueTri(data []byte, typ btf.Type, value string) error {
switch v := typ.(type) {
case *btf.Int:
if v.Encoding != btf.Bool {
return fmt.Errorf("cannot add tri value, expected btf.Bool, got: %v", v.Encoding)
}
if v.Size != 1 {
return fmt.Errorf("cannot add tri value, expected size of 1 byte, got: %d", v.Size)
}
switch value {
case "y":
data[0] = 1
case "n":
data[0] = 0
default:
return fmt.Errorf("cannot use %q for btf.Bool", value)
}
case *btf.Enum:
if v.Name != "libbpf_tristate" {
return fmt.Errorf("cannot use enum %q, only libbpf_tristate is supported", v.Name)
}
var tri triState
switch value {
case "y":
tri = TriYes
case "m":
tri = TriModule
case "n":
tri = TriNo
default:
return fmt.Errorf("value %q is not support for libbpf_tristate", value)
}
internal.NativeEndian.PutUint64(data, uint64(tri))
default:
return fmt.Errorf("cannot add number value, expected btf.Int or btf.Enum, got: %T", v)
}
return nil
}
func putValueString(data []byte, typ btf.Type, value string) error {
array, ok := typ.(*btf.Array)
if !ok {
return fmt.Errorf("cannot add string value, expected btf.Array, got %T", array)
}
contentType, ok := btf.UnderlyingType(array.Type).(*btf.Int)
if !ok {
return fmt.Errorf("cannot add string value, expected array of btf.Int, got %T", contentType)
}
// Any Int, which is not bool, of one byte could be used to store char:
// https://github.com/torvalds/linux/blob/1a5304fecee5/tools/lib/bpf/libbpf.c#L3637-L3638
if contentType.Size != 1 && contentType.Encoding != btf.Bool {
return fmt.Errorf("cannot add string value, expected array of btf.Int of size 1, got array of btf.Int of size: %v", contentType.Size)
}
if !strings.HasPrefix(value, `"`) || !strings.HasSuffix(value, `"`) {
return fmt.Errorf(`value %q must start and finish with '"'`, value)
}
str := strings.Trim(value, `"`)
// We need to trim string if the bpf array is smaller.
if uint32(len(str)) >= array.Nelems {
str = str[:array.Nelems]
}
// Write the string content to .kconfig.
copy(data, str)
return nil
}
func putValueNumber(data []byte, typ btf.Type, value string) error {
integer, ok := typ.(*btf.Int)
if !ok {
return fmt.Errorf("cannot add number value, expected *btf.Int, got: %T", integer)
}
size := integer.Size
sizeInBits := size * 8
var n uint64
var err error
if integer.Encoding == btf.Signed {
parsed, e := strconv.ParseInt(value, 0, int(sizeInBits))
n = uint64(parsed)
err = e
} else {
parsed, e := strconv.ParseUint(value, 0, int(sizeInBits))
n = uint64(parsed)
err = e
}
if err != nil {
return fmt.Errorf("cannot parse value: %w", err)
}
switch size {
case 1:
data[0] = byte(n)
case 2:
internal.NativeEndian.PutUint16(data, uint16(n))
case 4:
internal.NativeEndian.PutUint32(data, uint32(n))
case 8:
internal.NativeEndian.PutUint64(data, uint64(n))
default:
return fmt.Errorf("size (%d) is not valid, expected: 1, 2, 4 or 8", size)
}
return nil
}
@@ -0,0 +1,418 @@
package kconfig
import (
"bytes"
"encoding/binary"
"os"
"testing"
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal"
qt "github.com/frankban/quicktest"
)
func BenchmarkParse(b *testing.B) {
f, err := os.Open("testdata/config-6.2.15-300.fc38.x86_64.gz")
if err != nil {
b.Fatal(err)
}
defer f.Close()
b.ReportAllocs()
b.ResetTimer()
for n := 0; n < b.N; n++ {
_, err := Parse(f, nil)
if err != nil {
b.Fatal(err)
}
}
}
func BenchmarkParseFiltered(b *testing.B) {
f, err := os.Open("testdata/config-6.2.15-300.fc38.x86_64.gz")
if err != nil {
b.Fatal(err)
}
defer f.Close()
b.ReportAllocs()
b.ResetTimer()
// CONFIG_ARCH_USE_MEMTEST is the last CONFIG_ in the file.
// So, we will easily be able to see how many allocated bytes the filtering
// permits reducing compared to unfiltered benchmark.
filter := map[string]struct{}{"CONFIG_ARCH_USE_MEMTEST": {}}
for n := 0; n < b.N; n++ {
_, err := Parse(f, filter)
if err != nil {
b.Fatal(err)
}
}
}
func TestParse(t *testing.T) {
t.Parallel()
f, err := os.Open("testdata/test.kconfig")
if err != nil {
t.Fatal("Error reading /testdata/test.kconfig: ", err)
}
defer f.Close()
config, err := Parse(f, nil)
if err != nil {
t.Fatal("Error parsing kconfig: ", err)
}
expected := map[string]string{
"CONFIG_TRISTATE": "m",
"CONFIG_BOOL": "y",
"CONFIG_CHAR": "100",
"CONFIG_USHORT": "30000",
"CONFIG_INT": "123456",
"CONFIG_ULONG": "0xDEADBEEFC0DE",
"CONFIG_STR": `"abracad"`,
"CONFIG_FOO": `"foo"`,
}
qt.Assert(t, config, qt.DeepEquals, expected)
}
func TestParseFiltered(t *testing.T) {
t.Parallel()
f, err := os.Open("testdata/test.kconfig")
if err != nil {
t.Fatal("Error reading /testdata/test.kconfig: ", err)
}
defer f.Close()
filter := map[string]struct{}{"CONFIG_FOO": {}}
config, err := Parse(f, filter)
if err != nil {
t.Fatal("Error parsing gzipped kconfig: ", err)
}
expected := map[string]string{"CONFIG_FOO": `"foo"`}
qt.Assert(t, config, qt.DeepEquals, expected)
}
func TestParseGzipped(t *testing.T) {
t.Parallel()
f, err := os.Open("testdata/config-6.2.15-300.fc38.x86_64.gz")
if err != nil {
t.Fatal("Error reading /testdata/config-6.2.15-300.fc38.x86_64.gz: ", err)
}
defer f.Close()
_, err = Parse(f, nil)
if err != nil {
t.Fatal("Error parsing gzipped kconfig: ", err)
}
}
func TestParseGzippedFiltered(t *testing.T) {
t.Parallel()
f, err := os.Open("testdata/config-6.2.15-300.fc38.x86_64.gz")
if err != nil {
t.Fatal("Error reading /testdata/config-6.2.15-300.fc38.x86_64.gz: ", err)
}
defer f.Close()
filter := map[string]struct{}{"CONFIG_HZ": {}}
config, err := Parse(f, filter)
if err != nil {
t.Fatal("Error parsing gzipped kconfig: ", err)
}
expected := map[string]string{"CONFIG_HZ": "1000"}
qt.Assert(t, config, qt.DeepEquals, expected)
}
func TestProcessKconfigBadLine(t *testing.T) {
t.Parallel()
m := make(map[string]string)
err := processKconfigLine([]byte("CONFIG_FOO"), m, nil)
qt.Assert(t, err, qt.IsNotNil, qt.Commentf("line has no '='"))
err = processKconfigLine([]byte("CONFIG_FOO="), m, nil)
qt.Assert(t, err, qt.IsNotNil, qt.Commentf("line has no value"))
}
func TestPutValue(t *testing.T) {
t.Parallel()
type testCase struct {
typ btf.Type
value string
expected any
comment string
}
cases := []testCase{
{
typ: &btf.Int{
Size: 1,
Encoding: btf.Bool,
},
value: "n",
expected: int8(0),
},
{
typ: &btf.Int{
Size: 1,
Encoding: btf.Bool,
},
value: "y",
expected: int8(1),
},
{
typ: &btf.Int{
Size: 1,
Encoding: btf.Bool,
},
value: "foo",
comment: "Bad value",
},
{
typ: &btf.Int{},
comment: "Encoding is not Bool",
},
{
typ: &btf.Int{
Encoding: btf.Bool,
},
comment: "Size is not 1",
},
{
typ: &btf.Enum{
Name: "libbpf_tristate",
},
value: "y",
expected: int64(TriYes),
},
{
typ: &btf.Enum{
Name: "libbpf_tristate",
},
value: "n",
expected: int64(TriNo),
},
{
typ: &btf.Enum{
Name: "libbpf_tristate",
},
value: "m",
expected: int64(TriModule),
},
{
typ: &btf.Enum{
Name: "libbpf_tristate",
},
value: "foo",
comment: "Bad value",
},
{
typ: &btf.Enum{
Name: "error",
},
comment: "Enum name is wrong",
},
{
typ: &btf.Array{},
value: "y",
comment: "Type is not btf.Int",
},
{
typ: &btf.Int{
Size: 1,
},
value: "255",
expected: uint8(255),
},
{
typ: &btf.Int{
Size: 2,
},
value: "0xcafe",
expected: uint16(0xcafe),
},
{
typ: &btf.Int{
Size: 2,
},
value: "0755",
expected: uint16(0755),
},
{
typ: &btf.Int{
Size: 4,
Encoding: btf.Signed,
},
value: "-2147483648",
expected: int32(-2147483648),
},
{
typ: &btf.Int{
Size: 4,
Encoding: btf.Signed,
},
value: "+2147483647",
expected: int32(+2147483647),
},
{
typ: &btf.Int{
Size: 4,
},
value: "0xcafec0de",
expected: uint32(0xcafec0de),
},
{
typ: &btf.Int{
Size: 8,
Encoding: btf.Signed,
},
value: "+1000000000000",
expected: int64(1000000000000),
},
{
typ: &btf.Int{
Size: 8,
},
value: "1000000000000",
expected: uint64(1000000000000),
},
{
typ: &btf.Int{
Size: 1,
},
value: "foo",
comment: "Value is not an int",
},
{
typ: &btf.Array{},
value: "1",
comment: "Type is not btf.Int",
},
{
typ: &btf.Int{
Size: 16,
},
value: "1",
comment: "Size is wrong",
},
{
typ: &btf.Typedef{
Type: &btf.Int{
Size: 1,
},
},
value: "1",
expected: uint8(1),
},
{
typ: &btf.Array{
Type: &btf.Int{
Size: 1,
Encoding: btf.Char,
},
Nelems: 6,
},
value: `"foobar"`,
expected: []byte("foobar"),
},
{
typ: &btf.Array{
Type: &btf.Int{
Size: 1,
Encoding: btf.Unsigned,
},
Nelems: 3,
},
value: `"foobar"`,
expected: []byte("foo"),
},
{
typ: &btf.Array{
Type: &btf.Int{
Size: 1,
Encoding: btf.Signed,
},
Nelems: 2,
},
value: `"42"`,
expected: []byte("42"),
},
{
typ: &btf.Int{},
value: `"foo"`,
comment: "Type is not btf.Array",
},
{
typ: &btf.Array{},
value: `"foo"`,
comment: "Type is not btf.Array of btf.Int",
},
{
typ: &btf.Array{
Type: &btf.Int{
Size: 1,
Encoding: btf.Bool,
},
},
comment: "Type is not btf.Array of btf.Int of size 1 which is not btf.Bool",
},
{
typ: &btf.Array{
Type: &btf.Int{
Size: 4,
Encoding: btf.Char,
},
},
value: `"foo"`,
comment: "Type is not btf.Array of btf.Char of size 1",
},
{
typ: &btf.Array{
Type: &btf.Int{
Size: 1,
Encoding: btf.Char,
},
},
value: `"foo`,
comment: `Value does not start and end with '"'`,
},
}
for _, c := range cases {
if len(c.comment) > 0 {
err := PutValue(make([]byte, 0), c.typ, c.value)
qt.Assert(t, err, qt.IsNotNil, qt.Commentf(c.comment))
continue
}
var buf bytes.Buffer
err := binary.Write(&buf, internal.NativeEndian, c.expected)
if err != nil {
t.Fatal(err)
}
expected := buf.Bytes()
data := make([]byte, len(expected))
err = PutValue(data, c.typ, c.value)
qt.Assert(t, err, qt.IsNil)
qt.Assert(t, data, qt.DeepEquals, expected)
}
}
@@ -0,0 +1,11 @@
CONFIG_TRISTATE=m
# CONFIG_IS_NOT_SET is not set
CONFIG_BOOL=y
CONFIG_CHAR=100
CONFIG_USHORT=30000
CONFIG_INT=123456
CONFIG_ULONG=0xDEADBEEFC0DE
CONFIG_STR="abracad"
CONFIG_FOO="foo"
CONFIG_FOO="bar"
@@ -0,0 +1,26 @@
package internal
import (
"sync"
)
type memoizedFunc[T any] struct {
once sync.Once
fn func() (T, error)
result T
err error
}
func (mf *memoizedFunc[T]) do() (T, error) {
mf.once.Do(func() {
mf.result, mf.err = mf.fn()
})
return mf.result, mf.err
}
// Memoize the result of a function call.
//
// fn is only ever called once, even if it returns an error.
func Memoize[T any](fn func() (T, error)) func() (T, error) {
return (&memoizedFunc[T]{fn: fn}).do
}
@@ -0,0 +1,97 @@
package internal
import (
"bytes"
"errors"
"go/format"
"go/scanner"
"io"
"reflect"
"strings"
"unicode"
)
// Identifier turns a C style type or field name into an exportable Go equivalent.
func Identifier(str string) string {
prev := rune(-1)
return strings.Map(func(r rune) rune {
// See https://golang.org/ref/spec#Identifiers
switch {
case unicode.IsLetter(r):
if prev == -1 {
r = unicode.ToUpper(r)
}
case r == '_':
switch {
// The previous rune was deleted, or we are at the
// beginning of the string.
case prev == -1:
fallthrough
// The previous rune is a lower case letter or a digit.
case unicode.IsDigit(prev) || (unicode.IsLetter(prev) && unicode.IsLower(prev)):
// delete the current rune, and force the
// next character to be uppercased.
r = -1
}
case unicode.IsDigit(r):
default:
// Delete the current rune. prev is unchanged.
return -1
}
prev = r
return r
}, str)
}
// WriteFormatted outputs a formatted src into out.
//
// If formatting fails it returns an informative error message.
func WriteFormatted(src []byte, out io.Writer) error {
formatted, err := format.Source(src)
if err == nil {
_, err = out.Write(formatted)
return err
}
var el scanner.ErrorList
if !errors.As(err, &el) {
return err
}
var nel scanner.ErrorList
for _, err := range el {
if !err.Pos.IsValid() {
nel = append(nel, err)
continue
}
buf := src[err.Pos.Offset:]
nl := bytes.IndexRune(buf, '\n')
if nl == -1 {
nel = append(nel, err)
continue
}
err.Msg += ": " + string(buf[:nl])
nel = append(nel, err)
}
return nel
}
// GoTypeName is like %T, but elides the package name.
//
// Pointers to a type are peeled off.
func GoTypeName(t any) string {
rT := reflect.TypeOf(t)
for rT.Kind() == reflect.Pointer {
rT = rT.Elem()
}
// Doesn't return the correct Name for generic types due to https://github.com/golang/go/issues/55924
return rT.Name()
}
@@ -0,0 +1,41 @@
package internal
import (
"testing"
qt "github.com/frankban/quicktest"
)
func TestIdentifier(t *testing.T) {
testcases := []struct {
in, out string
}{
{".rodata", "Rodata"},
{"_foo_bar_", "FooBar"},
{"ipv6_test", "Ipv6Test"},
{"FOO_BAR", "FOO_BAR"},
{"FOO_", "FOO_"},
{"FOO__BAR", "FOO__BAR"},
{"FOO___BAR", "FOO___BAR"},
{"_FOO__BAR", "FOO__BAR"},
{"__FOO__BAR", "FOO__BAR"},
}
for _, tc := range testcases {
have := Identifier(tc.in)
if have != tc.out {
t.Errorf("Expected %q as output of %q, got %q", tc.out, tc.in, have)
}
}
}
func TestGoTypeName(t *testing.T) {
type foo struct{}
type bar[T any] struct{}
qt.Assert(t, GoTypeName(foo{}), qt.Equals, "foo")
qt.Assert(t, GoTypeName(new(foo)), qt.Equals, "foo")
qt.Assert(t, GoTypeName(new(*foo)), qt.Equals, "foo")
qt.Assert(t, GoTypeName(bar[int]{}), qt.Equals, "bar[int]")
// Broken in the stdlib, see GoTypeName for details.
// qt.Assert(t, GoTypeName(bar[qt.C]{}), qt.Equals, "bar[quicktest.C]")
}
@@ -0,0 +1,65 @@
package internal
import (
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
func Pin(currentPath, newPath string, fd *sys.FD) error {
if newPath == "" {
return errors.New("given pinning path cannot be empty")
}
if currentPath == newPath {
return nil
}
fsType, err := FSType(filepath.Dir(newPath))
if err != nil {
return err
}
if fsType != unix.BPF_FS_MAGIC {
return fmt.Errorf("%s is not on a bpf filesystem", newPath)
}
defer runtime.KeepAlive(fd)
if currentPath == "" {
return sys.ObjPin(&sys.ObjPinAttr{
Pathname: sys.NewStringPointer(newPath),
BpfFd: fd.Uint(),
})
}
// Renameat2 is used instead of os.Rename to disallow the new path replacing
// an existing path.
err = unix.Renameat2(unix.AT_FDCWD, currentPath, unix.AT_FDCWD, newPath, unix.RENAME_NOREPLACE)
if err == nil {
// Object is now moved to the new pinning path.
return nil
}
if !os.IsNotExist(err) {
return fmt.Errorf("unable to move pinned object to new path %v: %w", newPath, err)
}
// Internal state not in sync with the file system so let's fix it.
return sys.ObjPin(&sys.ObjPinAttr{
Pathname: sys.NewStringPointer(newPath),
BpfFd: fd.Uint(),
})
}
func Unpin(pinnedPath string) error {
if pinnedPath == "" {
return nil
}
err := os.Remove(pinnedPath)
if err == nil || os.IsNotExist(err) {
return nil
}
return err
}
@@ -0,0 +1,43 @@
package internal
import (
"runtime"
)
// PlatformPrefix returns the platform-dependent syscall wrapper prefix used by
// the linux kernel.
//
// Based on https://github.com/golang/go/blob/master/src/go/build/syslist.go
// and https://github.com/libbpf/libbpf/blob/master/src/libbpf.c#L10047
func PlatformPrefix() string {
switch runtime.GOARCH {
case "386":
return "__ia32_"
case "amd64", "amd64p32":
return "__x64_"
case "arm", "armbe":
return "__arm_"
case "arm64", "arm64be":
return "__arm64_"
case "mips", "mipsle", "mips64", "mips64le", "mips64p32", "mips64p32le":
return "__mips_"
case "s390":
return "__s390_"
case "s390x":
return "__s390x_"
case "riscv", "riscv64":
return "__riscv_"
case "ppc":
return "__powerpc_"
case "ppc64", "ppc64le":
return "__powerpc64_"
default:
return ""
}
}
@@ -0,0 +1,11 @@
package internal
// EmptyBPFContext is the smallest-possible BPF input context to be used for
// invoking `Program.{Run,Benchmark,Test}`.
//
// Programs require a context input buffer of at least 15 bytes. Looking in
// net/bpf/test_run.c, bpf_test_init() requires that the input is at least
// ETH_HLEN (14) bytes. As of Linux commit fd18942 ("bpf: Don't redirect packets
// with invalid pkt_len"), it also requires the skb to be non-empty after
// removing the Layer 2 header.
var EmptyBPFContext = make([]byte, 15)
@@ -0,0 +1,23 @@
package internal
import (
"unsafe"
"github.com/cilium/ebpf/internal/unix"
)
func FSType(path string) (int64, error) {
var statfs unix.Statfs_t
if err := unix.Statfs(path, &statfs); err != nil {
return 0, err
}
fsType := int64(statfs.Type)
if unsafe.Sizeof(statfs.Type) == 4 {
// We're on a 32 bit arch, where statfs.Type is int32. bpfFSType is a
// negative number when interpreted as int32 so we need to cast via
// uint32 to avoid sign extension.
fsType = int64(uint32(statfs.Type))
}
return fsType, nil
}
@@ -0,0 +1,23 @@
package internal
import (
"testing"
"github.com/cilium/ebpf/internal/unix"
qt "github.com/frankban/quicktest"
)
func TestFSType(t *testing.T) {
for _, fs := range []struct {
path string
magic int64
}{
{"/sys/kernel/tracing", unix.TRACEFS_MAGIC},
{"/sys/fs/bpf", unix.BPF_FS_MAGIC},
} {
fst, err := FSType(fs.path)
qt.Assert(t, err, qt.IsNil)
qt.Assert(t, fst, qt.Equals, fs.magic)
}
}
@@ -0,0 +1,6 @@
// Package sys contains bindings for the BPF syscall.
package sys
// Regenerate types.go by invoking go generate in the current directory.
//go:generate go run github.com/cilium/ebpf/internal/cmd/gentypes ../../btf/testdata/vmlinux.btf.gz
@@ -0,0 +1,133 @@
package sys
import (
"fmt"
"math"
"os"
"runtime"
"strconv"
"github.com/cilium/ebpf/internal/unix"
)
var ErrClosedFd = unix.EBADF
type FD struct {
raw int
}
func newFD(value int) *FD {
if onLeakFD != nil {
// Attempt to store the caller's stack for the given fd value.
// Panic if fds contains an existing stack for the fd.
old, exist := fds.LoadOrStore(value, callersFrames())
if exist {
f := old.(*runtime.Frames)
panic(fmt.Sprintf("found existing stack for fd %d:\n%s", value, FormatFrames(f)))
}
}
fd := &FD{value}
runtime.SetFinalizer(fd, (*FD).finalize)
return fd
}
// finalize is set as the FD's runtime finalizer and
// sends a leak trace before calling FD.Close().
func (fd *FD) finalize() {
if fd.raw < 0 {
return
}
// Invoke the fd leak callback. Calls LoadAndDelete to guarantee the callback
// is invoked at most once for one sys.FD allocation, runtime.Frames can only
// be unwound once.
f, ok := fds.LoadAndDelete(fd.Int())
if ok && onLeakFD != nil {
onLeakFD(f.(*runtime.Frames))
}
_ = fd.Close()
}
// NewFD wraps a raw fd with a finalizer.
//
// You must not use the raw fd after calling this function, since the underlying
// file descriptor number may change. This is because the BPF UAPI assumes that
// zero is not a valid fd value.
func NewFD(value int) (*FD, error) {
if value < 0 {
return nil, fmt.Errorf("invalid fd %d", value)
}
fd := newFD(value)
if value != 0 {
return fd, nil
}
dup, err := fd.Dup()
_ = fd.Close()
return dup, err
}
func (fd *FD) String() string {
return strconv.FormatInt(int64(fd.raw), 10)
}
func (fd *FD) Int() int {
return fd.raw
}
func (fd *FD) Uint() uint32 {
if fd.raw < 0 || int64(fd.raw) > math.MaxUint32 {
// Best effort: this is the number most likely to be an invalid file
// descriptor. It is equal to -1 (on two's complement arches).
return math.MaxUint32
}
return uint32(fd.raw)
}
func (fd *FD) Close() error {
if fd.raw < 0 {
return nil
}
return unix.Close(fd.disown())
}
func (fd *FD) disown() int {
value := int(fd.raw)
fds.Delete(int(value))
fd.raw = -1
runtime.SetFinalizer(fd, nil)
return value
}
func (fd *FD) Dup() (*FD, error) {
if fd.raw < 0 {
return nil, ErrClosedFd
}
// Always require the fd to be larger than zero: the BPF API treats the value
// as "no argument provided".
dup, err := unix.FcntlInt(uintptr(fd.raw), unix.F_DUPFD_CLOEXEC, 1)
if err != nil {
return nil, fmt.Errorf("can't dup fd: %v", err)
}
return newFD(dup), nil
}
// File takes ownership of FD and turns it into an [*os.File].
//
// You must not use the FD after the call returns.
//
// Returns nil if the FD is not valid.
func (fd *FD) File(name string) *os.File {
if fd.raw < 0 {
return nil
}
return os.NewFile(uintptr(fd.disown()), name)
}
@@ -0,0 +1,66 @@
package sys
import (
"os"
"syscall"
"testing"
"github.com/cilium/ebpf/internal/unix"
qt "github.com/frankban/quicktest"
)
func init() {
// Free up fd 0 for TestFD.
stdin, err := unix.FcntlInt(os.Stdin.Fd(), unix.F_DUPFD_CLOEXEC, 1)
if err != nil {
panic(err)
}
old := os.Stdin
os.Stdin = os.NewFile(uintptr(stdin), "stdin")
old.Close()
reserveFdZero()
}
func reserveFdZero() {
fd, err := unix.Open(os.DevNull, syscall.O_RDONLY, 0)
if err != nil {
panic(err)
}
if fd != 0 {
panic(err)
}
}
func TestFD(t *testing.T) {
_, err := NewFD(-1)
qt.Assert(t, err, qt.IsNotNil, qt.Commentf("negative fd should be rejected"))
fd, err := NewFD(0)
qt.Assert(t, err, qt.IsNil)
qt.Assert(t, fd.Int(), qt.Not(qt.Equals), 0, qt.Commentf("fd value should not be zero"))
var stat unix.Stat_t
err = unix.Fstat(0, &stat)
qt.Assert(t, err, qt.ErrorIs, unix.EBADF, qt.Commentf("zero fd should be closed"))
reserveFdZero()
}
func TestFDFile(t *testing.T) {
fd := newFD(openFd(t))
file := fd.File("test")
qt.Assert(t, file, qt.IsNotNil)
qt.Assert(t, file.Close(), qt.IsNil)
qt.Assert(t, fd.File("closed"), qt.IsNil)
_, err := fd.Dup()
qt.Assert(t, err, qt.ErrorIs, ErrClosedFd)
}
func openFd(tb testing.TB) int {
fd, err := unix.Open(os.DevNull, syscall.O_RDONLY, 0)
qt.Assert(tb, err, qt.IsNil)
return fd
}
@@ -0,0 +1,93 @@
package sys
import (
"bytes"
"fmt"
"runtime"
"sync"
)
// OnLeakFD controls tracing [FD] lifetime to detect resources that are not
// closed by Close().
//
// If fn is not nil, tracing is enabled for all FDs created going forward. fn is
// invoked for all FDs that are closed by the garbage collector instead of an
// explicit Close() by a caller. Calling OnLeakFD twice with a non-nil fn
// (without disabling tracing in the meantime) will cause a panic.
//
// If fn is nil, tracing will be disabled. Any FDs that have not been closed are
// considered to be leaked, fn will be invoked for them, and the process will be
// terminated.
//
// fn will be invoked at most once for every unique sys.FD allocation since a
// runtime.Frames can only be unwound once.
func OnLeakFD(fn func(*runtime.Frames)) {
// Enable leak tracing if new fn is provided.
if fn != nil {
if onLeakFD != nil {
panic("OnLeakFD called twice with non-nil fn")
}
onLeakFD = fn
return
}
// fn is nil past this point.
if onLeakFD == nil {
return
}
// Call onLeakFD for all open fds.
if fs := flushFrames(); len(fs) != 0 {
for _, f := range fs {
onLeakFD(f)
}
}
onLeakFD = nil
}
var onLeakFD func(*runtime.Frames)
// fds is a registry of all file descriptors wrapped into sys.fds that were
// created while an fd tracer was active.
var fds sync.Map // map[int]*runtime.Frames
// flushFrames removes all elements from fds and returns them as a slice. This
// deals with the fact that a runtime.Frames can only be unwound once using
// Next().
func flushFrames() []*runtime.Frames {
var frames []*runtime.Frames
fds.Range(func(key, value any) bool {
frames = append(frames, value.(*runtime.Frames))
fds.Delete(key)
return true
})
return frames
}
func callersFrames() *runtime.Frames {
c := make([]uintptr, 32)
// Skip runtime.Callers and this function.
i := runtime.Callers(2, c)
if i == 0 {
return nil
}
return runtime.CallersFrames(c)
}
// FormatFrames formats a runtime.Frames as a human-readable string.
func FormatFrames(fs *runtime.Frames) string {
var b bytes.Buffer
for {
f, more := fs.Next()
b.WriteString(fmt.Sprintf("\t%s+%#x\n\t\t%s:%d\n", f.Function, f.PC-f.Entry, f.File, f.Line))
if !more {
break
}
}
return b.String()
}
@@ -0,0 +1,49 @@
// Code generated by "stringer -type MapFlags"; DO NOT EDIT.
package sys
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[BPF_F_NO_PREALLOC-1]
_ = x[BPF_F_NO_COMMON_LRU-2]
_ = x[BPF_F_NUMA_NODE-4]
_ = x[BPF_F_RDONLY-8]
_ = x[BPF_F_WRONLY-16]
_ = x[BPF_F_STACK_BUILD_ID-32]
_ = x[BPF_F_ZERO_SEED-64]
_ = x[BPF_F_RDONLY_PROG-128]
_ = x[BPF_F_WRONLY_PROG-256]
_ = x[BPF_F_CLONE-512]
_ = x[BPF_F_MMAPABLE-1024]
_ = x[BPF_F_PRESERVE_ELEMS-2048]
_ = x[BPF_F_INNER_MAP-4096]
}
const _MapFlags_name = "BPF_F_NO_PREALLOCBPF_F_NO_COMMON_LRUBPF_F_NUMA_NODEBPF_F_RDONLYBPF_F_WRONLYBPF_F_STACK_BUILD_IDBPF_F_ZERO_SEEDBPF_F_RDONLY_PROGBPF_F_WRONLY_PROGBPF_F_CLONEBPF_F_MMAPABLEBPF_F_PRESERVE_ELEMSBPF_F_INNER_MAP"
var _MapFlags_map = map[MapFlags]string{
1: _MapFlags_name[0:17],
2: _MapFlags_name[17:36],
4: _MapFlags_name[36:51],
8: _MapFlags_name[51:63],
16: _MapFlags_name[63:75],
32: _MapFlags_name[75:95],
64: _MapFlags_name[95:110],
128: _MapFlags_name[110:127],
256: _MapFlags_name[127:144],
512: _MapFlags_name[144:155],
1024: _MapFlags_name[155:169],
2048: _MapFlags_name[169:189],
4096: _MapFlags_name[189:204],
}
func (i MapFlags) String() string {
if str, ok := _MapFlags_map[i]; ok {
return str
}
return "MapFlags(" + strconv.FormatInt(int64(i), 10) + ")"
}
@@ -0,0 +1,52 @@
package sys
import (
"unsafe"
"github.com/cilium/ebpf/internal/unix"
)
// NewPointer creates a 64-bit pointer from an unsafe Pointer.
func NewPointer(ptr unsafe.Pointer) Pointer {
return Pointer{ptr: ptr}
}
// NewSlicePointer creates a 64-bit pointer from a byte slice.
func NewSlicePointer(buf []byte) Pointer {
if len(buf) == 0 {
return Pointer{}
}
return Pointer{ptr: unsafe.Pointer(&buf[0])}
}
// NewSlicePointerLen creates a 64-bit pointer from a byte slice.
//
// Useful to assign both the pointer and the length in one go.
func NewSlicePointerLen(buf []byte) (Pointer, uint32) {
return NewSlicePointer(buf), uint32(len(buf))
}
// NewStringPointer creates a 64-bit pointer from a string.
func NewStringPointer(str string) Pointer {
p, err := unix.BytePtrFromString(str)
if err != nil {
return Pointer{}
}
return Pointer{ptr: unsafe.Pointer(p)}
}
// NewStringSlicePointer allocates an array of Pointers to each string in the
// given slice of strings and returns a 64-bit pointer to the start of the
// resulting array.
//
// Use this function to pass arrays of strings as syscall arguments.
func NewStringSlicePointer(strings []string) Pointer {
sp := make([]Pointer, 0, len(strings))
for _, s := range strings {
sp = append(sp, NewStringPointer(s))
}
return Pointer{ptr: unsafe.Pointer(&sp[0])}
}
@@ -0,0 +1,14 @@
//go:build armbe || mips || mips64p32
package sys
import (
"unsafe"
)
// Pointer wraps an unsafe.Pointer to be 64bit to
// conform to the syscall specification.
type Pointer struct {
pad uint32
ptr unsafe.Pointer
}
@@ -0,0 +1,14 @@
//go:build 386 || amd64p32 || arm || mipsle || mips64p32le
package sys
import (
"unsafe"
)
// Pointer wraps an unsafe.Pointer to be 64bit to
// conform to the syscall specification.
type Pointer struct {
ptr unsafe.Pointer
pad uint32
}
@@ -0,0 +1,13 @@
//go:build !386 && !amd64p32 && !arm && !mipsle && !mips64p32le && !armbe && !mips && !mips64p32
package sys
import (
"unsafe"
)
// Pointer wraps an unsafe.Pointer to be 64bit to
// conform to the syscall specification.
type Pointer struct {
ptr unsafe.Pointer
}
@@ -0,0 +1,83 @@
package sys
import (
"fmt"
"runtime"
"unsafe"
"github.com/cilium/ebpf/internal/unix"
)
// A sigset containing only SIGPROF.
var profSet unix.Sigset_t
func init() {
// See sigsetAdd for details on the implementation. Open coded here so
// that the compiler will check the constant calculations for us.
profSet.Val[sigprofBit/wordBits] |= 1 << (sigprofBit % wordBits)
}
// maskProfilerSignal locks the calling goroutine to its underlying OS thread
// and adds SIGPROF to the thread's signal mask. This prevents pprof from
// interrupting expensive syscalls like e.g. BPF_PROG_LOAD.
//
// The caller must defer unmaskProfilerSignal() to reverse the operation.
func maskProfilerSignal() {
runtime.LockOSThread()
if err := unix.PthreadSigmask(unix.SIG_BLOCK, &profSet, nil); err != nil {
runtime.UnlockOSThread()
panic(fmt.Errorf("masking profiler signal: %w", err))
}
}
// unmaskProfilerSignal removes SIGPROF from the underlying thread's signal
// mask, allowing it to be interrupted for profiling once again.
//
// It also unlocks the current goroutine from its underlying OS thread.
func unmaskProfilerSignal() {
defer runtime.UnlockOSThread()
if err := unix.PthreadSigmask(unix.SIG_UNBLOCK, &profSet, nil); err != nil {
panic(fmt.Errorf("unmasking profiler signal: %w", err))
}
}
const (
// Signal is the nth bit in the bitfield.
sigprofBit = int(unix.SIGPROF - 1)
// The number of bits in one Sigset_t word.
wordBits = int(unsafe.Sizeof(unix.Sigset_t{}.Val[0])) * 8
)
// sigsetAdd adds signal to set.
//
// Note: Sigset_t.Val's value type is uint32 or uint64 depending on the arch.
// This function must be able to deal with both and so must avoid any direct
// references to u32 or u64 types.
func sigsetAdd(set *unix.Sigset_t, signal unix.Signal) error {
if signal < 1 {
return fmt.Errorf("signal %d must be larger than 0", signal)
}
// For amd64, runtime.sigaddset() performs the following operation:
// set[(signal-1)/32] |= 1 << ((uint32(signal) - 1) & 31)
//
// This trick depends on sigset being two u32's, causing a signal in the the
// bottom 31 bits to be written to the low word if bit 32 is low, or the high
// word if bit 32 is high.
// Signal is the nth bit in the bitfield.
bit := int(signal - 1)
// Word within the sigset the bit needs to be written to.
word := bit / wordBits
if word >= len(set.Val) {
return fmt.Errorf("signal %d does not fit within unix.Sigset_t", signal)
}
// Write the signal bit into its corresponding word at the corrected offset.
set.Val[word] |= 1 << (bit % wordBits)
return nil
}
@@ -0,0 +1,78 @@
package sys
import (
"runtime"
"testing"
"unsafe"
"github.com/cilium/ebpf/internal/unix"
qt "github.com/frankban/quicktest"
)
func TestSigset(t *testing.T) {
const maxSignal = unix.Signal(unsafe.Sizeof(unix.Sigset_t{}) * 8)
// Type-infer a sigset word. This is a typed uint of 32 or 64 bits depending
// on the target architecture, so we can't use an untyped uint.
zero := unix.Sigset_t{}.Val[0]
words := len(unix.Sigset_t{}.Val)
var want, got unix.Sigset_t
// Flip the first bit of the first word.
if err := sigsetAdd(&got, 1); err != nil {
t.Fatal(err)
}
want.Val[0] = 1
if want != got {
t.Fatalf("expected first word to be 0x%x, got: 0x%x", want, got)
}
// And the last bit of the last word.
if err := sigsetAdd(&got, maxSignal); err != nil {
t.Fatal(err)
}
want.Val[words-1] = ^(^zero >> 1)
if want != got {
t.Fatalf("expected last word to be 0x%x, got: 0x%x", want, got)
}
if err := sigsetAdd(&got, maxSignal+1); err == nil {
t.Fatal("expected out-of-bounds add to be rejected")
}
if err := sigsetAdd(&got, -1); err == nil {
t.Fatal("expected negative signal to be rejected")
}
}
func TestProfilerSignal(t *testing.T) {
// Additional goroutine lock to make the PthreadSigmask below execute on the
// same OS thread as the functions under test. UnlockOSThread needs to be
// called as many times as LockOSThread to unlock the goroutine.
runtime.LockOSThread()
defer runtime.UnlockOSThread()
var old unix.Sigset_t
if err := unix.PthreadSigmask(0, nil, &old); err != nil {
t.Fatal("get sigmask:", err)
}
maskProfilerSignal()
var have unix.Sigset_t
if err := unix.PthreadSigmask(0, nil, &have); err != nil {
t.Fatal("get sigmask:", err)
}
want := have
qt.Assert(t, sigsetAdd(&want, unix.SIGPROF), qt.IsNil)
qt.Assert(t, have, qt.Equals, want)
unmaskProfilerSignal()
if err := unix.PthreadSigmask(0, nil, &have); err != nil {
t.Fatal("get sigmask:", err)
}
qt.Assert(t, have, qt.Equals, old)
}
@@ -0,0 +1,178 @@
package sys
import (
"runtime"
"syscall"
"unsafe"
"github.com/cilium/ebpf/internal/unix"
)
// ENOTSUPP is a Linux internal error code that has leaked into UAPI.
//
// It is not the same as ENOTSUP or EOPNOTSUPP.
var ENOTSUPP = syscall.Errno(524)
// BPF wraps SYS_BPF.
//
// Any pointers contained in attr must use the Pointer type from this package.
func BPF(cmd Cmd, attr unsafe.Pointer, size uintptr) (uintptr, error) {
// Prevent the Go profiler from repeatedly interrupting the verifier,
// which could otherwise lead to a livelock due to receiving EAGAIN.
if cmd == BPF_PROG_LOAD || cmd == BPF_PROG_RUN {
maskProfilerSignal()
defer unmaskProfilerSignal()
}
for {
r1, _, errNo := unix.Syscall(unix.SYS_BPF, uintptr(cmd), uintptr(attr), size)
runtime.KeepAlive(attr)
// As of ~4.20 the verifier can be interrupted by a signal,
// and returns EAGAIN in that case.
if errNo == unix.EAGAIN && cmd == BPF_PROG_LOAD {
continue
}
var err error
if errNo != 0 {
err = wrappedErrno{errNo}
}
return r1, err
}
}
// Info is implemented by all structs that can be passed to the ObjInfo syscall.
//
// MapInfo
// ProgInfo
// LinkInfo
// BtfInfo
type Info interface {
info() (unsafe.Pointer, uint32)
}
var _ Info = (*MapInfo)(nil)
func (i *MapInfo) info() (unsafe.Pointer, uint32) {
return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i))
}
var _ Info = (*ProgInfo)(nil)
func (i *ProgInfo) info() (unsafe.Pointer, uint32) {
return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i))
}
var _ Info = (*LinkInfo)(nil)
func (i *LinkInfo) info() (unsafe.Pointer, uint32) {
return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i))
}
var _ Info = (*BtfInfo)(nil)
func (i *BtfInfo) info() (unsafe.Pointer, uint32) {
return unsafe.Pointer(i), uint32(unsafe.Sizeof(*i))
}
// ObjInfo retrieves information about a BPF Fd.
//
// info may be one of MapInfo, ProgInfo, LinkInfo and BtfInfo.
func ObjInfo(fd *FD, info Info) error {
ptr, len := info.info()
err := ObjGetInfoByFd(&ObjGetInfoByFdAttr{
BpfFd: fd.Uint(),
InfoLen: len,
Info: NewPointer(ptr),
})
runtime.KeepAlive(fd)
return err
}
// BPFObjName is a null-terminated string made up of
// 'A-Za-z0-9_' characters.
type ObjName [unix.BPF_OBJ_NAME_LEN]byte
// NewObjName truncates the result if it is too long.
func NewObjName(name string) ObjName {
var result ObjName
copy(result[:unix.BPF_OBJ_NAME_LEN-1], name)
return result
}
// LogLevel controls the verbosity of the kernel's eBPF program verifier.
type LogLevel uint32
const (
BPF_LOG_LEVEL1 LogLevel = 1 << iota
BPF_LOG_LEVEL2
BPF_LOG_STATS
)
// LinkID uniquely identifies a bpf_link.
type LinkID uint32
// BTFID uniquely identifies a BTF blob loaded into the kernel.
type BTFID uint32
// TypeID identifies a type in a BTF blob.
type TypeID uint32
// MapFlags control map behaviour.
type MapFlags uint32
//go:generate stringer -type MapFlags
const (
BPF_F_NO_PREALLOC MapFlags = 1 << iota
BPF_F_NO_COMMON_LRU
BPF_F_NUMA_NODE
BPF_F_RDONLY
BPF_F_WRONLY
BPF_F_STACK_BUILD_ID
BPF_F_ZERO_SEED
BPF_F_RDONLY_PROG
BPF_F_WRONLY_PROG
BPF_F_CLONE
BPF_F_MMAPABLE
BPF_F_PRESERVE_ELEMS
BPF_F_INNER_MAP
)
// wrappedErrno wraps syscall.Errno to prevent direct comparisons with
// syscall.E* or unix.E* constants.
//
// You should never export an error of this type.
type wrappedErrno struct {
syscall.Errno
}
func (we wrappedErrno) Unwrap() error {
return we.Errno
}
func (we wrappedErrno) Error() string {
if we.Errno == ENOTSUPP {
return "operation not supported"
}
return we.Errno.Error()
}
type syscallError struct {
error
errno syscall.Errno
}
func Error(err error, errno syscall.Errno) error {
return &syscallError{err, errno}
}
func (se *syscallError) Is(target error) bool {
return target == se.error
}
func (se *syscallError) Unwrap() error {
return se.errno
}
@@ -0,0 +1,61 @@
package sys
import (
"errors"
"testing"
"github.com/cilium/ebpf/internal/unix"
qt "github.com/frankban/quicktest"
)
func TestObjName(t *testing.T) {
name := NewObjName("more_than_16_characters_long")
if name[len(name)-1] != 0 {
t.Error("NewBPFObjName doesn't null terminate")
}
if len(name) != unix.BPF_OBJ_NAME_LEN {
t.Errorf("Name is %d instead of %d bytes long", len(name), unix.BPF_OBJ_NAME_LEN)
}
}
func TestWrappedErrno(t *testing.T) {
a := error(wrappedErrno{unix.EINVAL})
b := error(unix.EINVAL)
if a == b {
t.Error("wrappedErrno is comparable to plain errno")
}
if !errors.Is(a, b) {
t.Error("errors.Is(wrappedErrno, errno) returns false")
}
if errors.Is(a, unix.EAGAIN) {
t.Error("errors.Is(wrappedErrno, EAGAIN) returns true")
}
notsupp := wrappedErrno{ENOTSUPP}
qt.Assert(t, notsupp.Error(), qt.Contains, "operation not supported")
}
func TestSyscallError(t *testing.T) {
err := errors.New("foo")
foo := Error(err, unix.EINVAL)
if !errors.Is(foo, unix.EINVAL) {
t.Error("SyscallError is not the wrapped errno")
}
if !errors.Is(foo, err) {
t.Error("SyscallError is not the wrapped error")
}
if errors.Is(unix.EINVAL, foo) {
t.Error("Errno is the SyscallError")
}
if errors.Is(err, foo) {
t.Error("Error is the SyscallError")
}
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,21 @@
package testutils
import (
"os"
"testing"
)
// TempBPFFS creates a temporary directory on a BPF FS.
//
// The directory is automatically cleaned up at the end of the test run.
func TempBPFFS(tb testing.TB) string {
tb.Helper()
tmp, err := os.MkdirTemp("/sys/fs/bpf", "ebpf-test")
if err != nil {
tb.Fatal("Create temporary directory on BPFFS:", err)
}
tb.Cleanup(func() { os.RemoveAll(tmp) })
return tmp
}
@@ -0,0 +1,65 @@
package testutils
import (
"errors"
"os"
"strings"
"testing"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/unix"
)
var cgroup2Path = internal.Memoize(func() (string, error) {
mounts, err := os.ReadFile("/proc/mounts")
if err != nil {
return "", err
}
for _, line := range strings.Split(string(mounts), "\n") {
mount := strings.SplitN(line, " ", 3)
if mount[0] == "cgroup2" {
return mount[1], nil
}
continue
}
return "", errors.New("cgroup2 not mounted")
})
func CreateCgroup(tb testing.TB) *os.File {
tb.Helper()
cg2, err := cgroup2Path()
if err != nil {
tb.Fatal("Can't locate cgroup2 mount:", err)
}
cgdir, err := os.MkdirTemp(cg2, "ebpf-link")
if err != nil {
tb.Fatal("Can't create cgroupv2:", err)
}
cgroup, err := os.Open(cgdir)
if err != nil {
os.Remove(cgdir)
tb.Fatal(err)
}
tb.Cleanup(func() {
cgroup.Close()
os.Remove(cgdir)
})
return cgroup
}
func GetCgroupIno(t *testing.T, cgroup *os.File) uint64 {
cgroupStat := unix.Stat_t{}
err := unix.Fstat(int(cgroup.Fd()), &cgroupStat)
if err != nil {
t.Fatal(err)
}
return cgroupStat.Ino
}
@@ -0,0 +1,34 @@
package fdtrace
import (
"fmt"
"os"
"runtime"
"testing"
"github.com/cilium/ebpf/internal/sys"
)
// TestMain runs m with sys.FD leak tracing enabled.
func TestMain(m *testing.M) {
// fn can either be invoked asynchronously by the gc or during disabling of
// the leak tracer below. Don't terminate the program immediately, instead
// capture a boolean that will be used to set the exit code. This avoids races
// and gives all events the chance to be written to stderr.
var leak bool
sys.OnLeakFD(func(fs *runtime.Frames) {
fmt.Fprintln(os.Stderr, "leaked fd created at:")
fmt.Fprintln(os.Stderr, sys.FormatFrames(fs))
leak = true
})
ret := m.Run()
sys.OnLeakFD(nil)
if leak {
ret = 99
}
os.Exit(ret)
}
@@ -0,0 +1,139 @@
package testutils
import (
"errors"
"os"
"strings"
"testing"
"github.com/cilium/ebpf/internal"
)
const (
ignoreKernelVersionEnvVar = "EBPF_TEST_IGNORE_KERNEL_VERSION"
)
func CheckFeatureTest(t *testing.T, fn func() error) {
checkFeatureTestError(t, fn())
}
func checkFeatureTestError(t *testing.T, err error) {
if err == nil {
return
}
var ufe *internal.UnsupportedFeatureError
if errors.As(err, &ufe) {
if ignoreKernelVersionCheck(t.Name()) {
t.Skipf("Ignoring error due to %s: %s", ignoreKernelVersionEnvVar, ufe.Error())
} else {
checkKernelVersion(t, ufe)
}
} else {
t.Error("Feature test failed:", err)
}
}
func CheckFeatureMatrix[K comparable](t *testing.T, fm internal.FeatureMatrix[K]) {
t.Helper()
for key, ft := range fm {
t.Run(ft.Name, func(t *testing.T) {
checkFeatureTestError(t, fm.Result(key))
})
}
}
func SkipIfNotSupported(tb testing.TB, err error) {
tb.Helper()
if err == internal.ErrNotSupported {
tb.Fatal("Unwrapped ErrNotSupported")
}
var ufe *internal.UnsupportedFeatureError
if errors.As(err, &ufe) {
checkKernelVersion(tb, ufe)
tb.Skip(ufe.Error())
}
if errors.Is(err, internal.ErrNotSupported) {
tb.Skip(err.Error())
}
}
func checkKernelVersion(tb testing.TB, ufe *internal.UnsupportedFeatureError) {
if ufe.MinimumVersion.Unspecified() {
return
}
if !isKernelLessThan(tb, ufe.MinimumVersion) {
tb.Helper()
tb.Fatalf("Feature '%s' isn't supported even though kernel is newer than %s",
ufe.Name, ufe.MinimumVersion)
}
}
func SkipOnOldKernel(tb testing.TB, minVersion, feature string) {
tb.Helper()
if IsKernelLessThan(tb, minVersion) {
tb.Skipf("Test requires at least kernel %s (due to missing %s)", minVersion, feature)
}
}
func IsKernelLessThan(tb testing.TB, minVersion string) bool {
tb.Helper()
minv, err := internal.NewVersion(minVersion)
if err != nil {
tb.Fatalf("Invalid version %s: %s", minVersion, err)
}
return isKernelLessThan(tb, minv)
}
func isKernelLessThan(tb testing.TB, minv internal.Version) bool {
tb.Helper()
if max := os.Getenv("CI_MAX_KERNEL_VERSION"); max != "" {
maxv, err := internal.NewVersion(max)
if err != nil {
tb.Fatalf("Invalid version %q in CI_MAX_KERNEL_VERSION: %s", max, err)
}
if maxv.Less(minv) {
tb.Fatalf("Test for %s will never execute on CI since %s is the most recent kernel", minv, maxv)
}
}
return kernelVersion(tb).Less(minv)
}
func kernelVersion(tb testing.TB) internal.Version {
tb.Helper()
v, err := internal.KernelVersion()
if err != nil {
tb.Fatal(err)
}
return v
}
// ignoreKernelVersionCheck checks if test name should be ignored for kernel version check by checking against environment var EBPF_TEST_IGNORE_KERNEL_VERSION.
// EBPF_TEST_IGNORE_KERNEL_VERSION is a comma (,) separated list of test names for which kernel version check should be ignored.
//
// eg: EBPF_TEST_IGNORE_KERNEL_VERSION=TestABC,TestXYZ
func ignoreKernelVersionCheck(tName string) bool {
tNames := os.Getenv(ignoreKernelVersionEnvVar)
if tNames == "" {
return false
}
ignored := strings.Split(tNames, ",")
for _, n := range ignored {
if strings.TrimSpace(n) == tName {
return true
}
}
return false
}
@@ -0,0 +1,54 @@
package testutils
import (
"testing"
)
func TestIgnoreKernelVersionCheckWhenEnvVarIsSet(t *testing.T) {
tests := []struct {
name string
toIgnoreNamesEnvValue string
testName string
ignoreKernelVersionCheck bool
}{
{
name: "should NOT ignore kernel version check if environment var set to empty string",
toIgnoreNamesEnvValue: "",
testName: "TestABC",
ignoreKernelVersionCheck: false,
},
{
name: "should ignore kernel version check if environment var set to skip test name with single value",
toIgnoreNamesEnvValue: "TestABC",
testName: "TestABC",
ignoreKernelVersionCheck: true,
},
{
name: "should match test name when multiple comma separated names list is provided",
toIgnoreNamesEnvValue: "TestABC,TestXYZ",
testName: "TestXYZ",
ignoreKernelVersionCheck: true,
},
{
name: "should NOT match test name when multiple comma separated names list is provided but name is not present in list",
toIgnoreNamesEnvValue: "TestABC,TestXYZ",
testName: "TestPQR",
ignoreKernelVersionCheck: false,
},
{
name: "should match test name if names list has leading/trailing spaces",
toIgnoreNamesEnvValue: "TestABC, TestXYZ , TestPQR",
testName: "TestXYZ",
ignoreKernelVersionCheck: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Setenv(ignoreKernelVersionEnvVar, tt.toIgnoreNamesEnvValue)
if got := ignoreKernelVersionCheck(tt.testName); got != tt.ignoreKernelVersionCheck {
t.Errorf("ignoreKernelVersionCheck() = %v, want %v", got, tt.ignoreKernelVersionCheck)
}
})
}
}
@@ -0,0 +1,58 @@
package testutils
import (
"path/filepath"
"testing"
)
// Files calls fn for each given file.
//
// The function errors out if the pattern matches no files.
func Files(t *testing.T, files []string, fn func(*testing.T, string)) {
t.Helper()
if len(files) == 0 {
t.Fatalf("No files given")
}
for _, f := range files {
file := f // force copy
name := filepath.Base(file)
t.Run(name, func(t *testing.T) {
fn(t, file)
})
}
}
// Glob finds files matching a pattern.
//
// The pattern should may include full path. Excludes use the same syntax as
// pattern, but are only applied to the basename instead of the full path.
func Glob(tb testing.TB, pattern string, excludes ...string) []string {
tb.Helper()
files, err := filepath.Glob(pattern)
if err != nil {
tb.Fatal("Can't glob files:", err)
}
if len(excludes) == 0 {
return files
}
var filtered []string
nextFile:
for _, file := range files {
base := filepath.Base(file)
for _, exclude := range excludes {
if matched, err := filepath.Match(exclude, base); err != nil {
tb.Fatal(err)
} else if matched {
continue nextFile
}
}
filtered = append(filtered, file)
}
return filtered
}
@@ -0,0 +1,16 @@
package testutils
import (
"fmt"
"os"
"github.com/cilium/ebpf/rlimit"
)
func init() {
// Increase the memlock for all tests unconditionally. It's a great source of
// weird bugs, since different distros have different default limits.
if err := rlimit.RemoveMemlock(); err != nil {
fmt.Fprintln(os.Stderr, "WARNING: Failed to adjust rlimit, tests may fail")
}
}
@@ -0,0 +1,21 @@
package testutils
import (
"fmt"
"math/rand"
"sync"
"time"
)
var randSeed struct {
value int64
once sync.Once
}
func Rand() *rand.Rand {
randSeed.once.Do(func() {
randSeed.value = time.Now().UnixMicro()
fmt.Printf("Random seed is %d\n", randSeed.value)
})
return rand.New(rand.NewSource(randSeed.value))
}
@@ -0,0 +1,359 @@
package tracefs
import (
"crypto/rand"
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
"strings"
"syscall"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/unix"
)
var (
ErrInvalidInput = errors.New("invalid input")
ErrInvalidMaxActive = errors.New("can only set maxactive on kretprobes")
)
//go:generate stringer -type=ProbeType -linecomment
type ProbeType uint8
const (
Kprobe ProbeType = iota // kprobe
Uprobe // uprobe
)
func (pt ProbeType) eventsFile() (*os.File, error) {
path, err := sanitizeTracefsPath(fmt.Sprintf("%s_events", pt.String()))
if err != nil {
return nil, err
}
return os.OpenFile(path, os.O_APPEND|os.O_WRONLY, 0666)
}
type ProbeArgs struct {
Type ProbeType
Symbol, Group, Path string
Offset, RefCtrOffset, Cookie uint64
Pid, RetprobeMaxActive int
Ret bool
}
// RandomGroup generates a pseudorandom string for use as a tracefs group name.
// Returns an error when the output string would exceed 63 characters (kernel
// limitation), when rand.Read() fails or when prefix contains characters not
// allowed by IsValidTraceID.
func RandomGroup(prefix string) (string, error) {
if !validIdentifier(prefix) {
return "", fmt.Errorf("prefix '%s' must be alphanumeric or underscore: %w", prefix, ErrInvalidInput)
}
b := make([]byte, 8)
if _, err := rand.Read(b); err != nil {
return "", fmt.Errorf("reading random bytes: %w", err)
}
group := fmt.Sprintf("%s_%x", prefix, b)
if len(group) > 63 {
return "", fmt.Errorf("group name '%s' cannot be longer than 63 characters: %w", group, ErrInvalidInput)
}
return group, nil
}
// validIdentifier implements the equivalent of a regex match
// against "^[a-zA-Z_][0-9a-zA-Z_]*$".
//
// Trace event groups, names and kernel symbols must adhere to this set
// of characters. Non-empty, first character must not be a number, all
// characters must be alphanumeric or underscore.
func validIdentifier(s string) bool {
if len(s) < 1 {
return false
}
for i, c := range []byte(s) {
switch {
case c >= 'a' && c <= 'z':
case c >= 'A' && c <= 'Z':
case c == '_':
case i > 0 && c >= '0' && c <= '9':
default:
return false
}
}
return true
}
func sanitizeTracefsPath(path ...string) (string, error) {
base, err := getTracefsPath()
if err != nil {
return "", err
}
l := filepath.Join(path...)
p := filepath.Join(base, l)
if !strings.HasPrefix(p, base) {
return "", fmt.Errorf("path '%s' attempts to escape base path '%s': %w", l, base, ErrInvalidInput)
}
return p, nil
}
// getTracefsPath will return a correct path to the tracefs mount point.
// Since kernel 4.1 tracefs should be mounted by default at /sys/kernel/tracing,
// but may be also be available at /sys/kernel/debug/tracing if debugfs is mounted.
// The available tracefs paths will depends on distribution choices.
var getTracefsPath = internal.Memoize(func() (string, error) {
for _, p := range []struct {
path string
fsType int64
}{
{"/sys/kernel/tracing", unix.TRACEFS_MAGIC},
{"/sys/kernel/debug/tracing", unix.TRACEFS_MAGIC},
// RHEL/CentOS
{"/sys/kernel/debug/tracing", unix.DEBUGFS_MAGIC},
} {
if fsType, err := internal.FSType(p.path); err == nil && fsType == p.fsType {
return p.path, nil
}
}
return "", errors.New("neither debugfs nor tracefs are mounted")
})
// sanitizeIdentifier replaces every invalid character for the tracefs api with an underscore.
//
// It is equivalent to calling regexp.MustCompile("[^a-zA-Z0-9]+").ReplaceAllString("_").
func sanitizeIdentifier(s string) string {
var skip bool
return strings.Map(func(c rune) rune {
switch {
case c >= 'a' && c <= 'z',
c >= 'A' && c <= 'Z',
c >= '0' && c <= '9':
skip = false
return c
case skip:
return -1
default:
skip = true
return '_'
}
}, s)
}
// EventID reads a trace event's ID from tracefs given its group and name.
// The kernel requires group and name to be alphanumeric or underscore.
func EventID(group, name string) (uint64, error) {
if !validIdentifier(group) {
return 0, fmt.Errorf("invalid tracefs group: %q", group)
}
if !validIdentifier(name) {
return 0, fmt.Errorf("invalid tracefs name: %q", name)
}
path, err := sanitizeTracefsPath("events", group, name, "id")
if err != nil {
return 0, err
}
tid, err := internal.ReadUint64FromFile("%d\n", path)
if errors.Is(err, os.ErrNotExist) {
return 0, err
}
if err != nil {
return 0, fmt.Errorf("reading trace event ID of %s/%s: %w", group, name, err)
}
return tid, nil
}
func probePrefix(ret bool, maxActive int) string {
if ret {
if maxActive > 0 {
return fmt.Sprintf("r%d", maxActive)
}
return "r"
}
return "p"
}
// Event represents an entry in a tracefs probe events file.
type Event struct {
typ ProbeType
group, name string
// event id allocated by the kernel. 0 if the event has already been removed.
id uint64
}
// NewEvent creates a new ephemeral trace event.
//
// Returns os.ErrNotExist if symbol is not a valid
// kernel symbol, or if it is not traceable with kprobes. Returns os.ErrExist
// if a probe with the same group and symbol already exists. Returns an error if
// args.RetprobeMaxActive is used on non kprobe types. Returns ErrNotSupported if
// the kernel is too old to support kretprobe maxactive.
func NewEvent(args ProbeArgs) (*Event, error) {
// Before attempting to create a trace event through tracefs,
// check if an event with the same group and name already exists.
// Kernels 4.x and earlier don't return os.ErrExist on writing a duplicate
// entry, so we need to rely on reads for detecting uniqueness.
eventName := sanitizeIdentifier(args.Symbol)
_, err := EventID(args.Group, eventName)
if err == nil {
return nil, fmt.Errorf("trace event %s/%s: %w", args.Group, eventName, os.ErrExist)
}
if err != nil && !errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("checking trace event %s/%s: %w", args.Group, eventName, err)
}
// Open the kprobe_events file in tracefs.
f, err := args.Type.eventsFile()
if err != nil {
return nil, err
}
defer f.Close()
var pe, token string
switch args.Type {
case Kprobe:
// The kprobe_events syntax is as follows (see Documentation/trace/kprobetrace.txt):
// p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe
// r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe
// -:[GRP/]EVENT : Clear a probe
//
// Some examples:
// r:ebpf_1234/r_my_kretprobe nf_conntrack_destroy
// p:ebpf_5678/p_my_kprobe __x64_sys_execve
//
// Leaving the kretprobe's MAXACTIVE set to 0 (or absent) will make the
// kernel default to NR_CPUS. This is desired in most eBPF cases since
// subsampling or rate limiting logic can be more accurately implemented in
// the eBPF program itself.
// See Documentation/kprobes.txt for more details.
if args.RetprobeMaxActive != 0 && !args.Ret {
return nil, ErrInvalidMaxActive
}
token = KprobeToken(args)
pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.Ret, args.RetprobeMaxActive), args.Group, eventName, token)
case Uprobe:
// The uprobe_events syntax is as follows:
// p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a probe
// r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return probe
// -:[GRP/]EVENT : Clear a probe
//
// Some examples:
// r:ebpf_1234/readline /bin/bash:0x12345
// p:ebpf_5678/main_mySymbol /bin/mybin:0x12345(0x123)
//
// See Documentation/trace/uprobetracer.txt for more details.
if args.RetprobeMaxActive != 0 {
return nil, ErrInvalidMaxActive
}
token = UprobeToken(args)
pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.Ret, 0), args.Group, eventName, token)
}
_, err = f.WriteString(pe)
// Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL
// when trying to create a retprobe for a missing symbol.
if errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("token %s: not found: %w", token, err)
}
// Since commit ab105a4fb894, EILSEQ is returned when a kprobe sym+offset is resolved
// to an invalid insn boundary. The exact conditions that trigger this error are
// arch specific however.
if errors.Is(err, syscall.EILSEQ) {
return nil, fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist)
}
// ERANGE is returned when the `SYM[+offs]` token is too big and cannot
// be resolved.
if errors.Is(err, syscall.ERANGE) {
return nil, fmt.Errorf("token %s: offset too big: %w", token, os.ErrNotExist)
}
if err != nil {
return nil, fmt.Errorf("token %s: writing '%s': %w", token, pe, err)
}
// Get the newly-created trace event's id.
tid, err := EventID(args.Group, eventName)
if args.RetprobeMaxActive != 0 && errors.Is(err, os.ErrNotExist) {
// Kernels < 4.12 don't support maxactive and therefore auto generate
// group and event names from the symbol and offset. The symbol is used
// without any sanitization.
// See https://elixir.bootlin.com/linux/v4.10/source/kernel/trace/trace_kprobe.c#L712
event := fmt.Sprintf("kprobes/r_%s_%d", args.Symbol, args.Offset)
if err := removeEvent(args.Type, event); err != nil {
return nil, fmt.Errorf("failed to remove spurious maxactive event: %s", err)
}
return nil, fmt.Errorf("create trace event with non-default maxactive: %w", internal.ErrNotSupported)
}
if err != nil {
return nil, fmt.Errorf("get trace event id: %w", err)
}
evt := &Event{args.Type, args.Group, eventName, tid}
runtime.SetFinalizer(evt, (*Event).Close)
return evt, nil
}
// Close removes the event from tracefs.
//
// Returns os.ErrClosed if the event has already been closed before.
func (evt *Event) Close() error {
if evt.id == 0 {
return os.ErrClosed
}
evt.id = 0
runtime.SetFinalizer(evt, nil)
pe := fmt.Sprintf("%s/%s", evt.group, evt.name)
return removeEvent(evt.typ, pe)
}
func removeEvent(typ ProbeType, pe string) error {
f, err := typ.eventsFile()
if err != nil {
return err
}
defer f.Close()
// See [k,u]probe_events syntax above. The probe type does not need to be specified
// for removals.
if _, err = f.WriteString("-:" + pe); err != nil {
return fmt.Errorf("remove event %q from %s: %w", pe, f.Name(), err)
}
return nil
}
// ID returns the tracefs ID associated with the event.
func (evt *Event) ID() uint64 {
return evt.id
}
// Group returns the tracefs group used by the event.
func (evt *Event) Group() string {
return evt.group
}
// KprobeToken creates the SYM[+offs] token for the tracefs api.
func KprobeToken(args ProbeArgs) string {
po := args.Symbol
if args.Offset != 0 {
po += fmt.Sprintf("+%#x", args.Offset)
}
return po
}
@@ -0,0 +1,79 @@
package tracefs
import (
"fmt"
"os"
"testing"
qt "github.com/frankban/quicktest"
)
// Global symbol, present on all tested kernels.
const ksym = "vprintk"
func TestKprobeTraceFSGroup(t *testing.T) {
c := qt.New(t)
// Expect <prefix>_<16 random hex chars>.
g, err := RandomGroup("ebpftest")
c.Assert(err, qt.IsNil)
c.Assert(g, qt.Matches, `ebpftest_[a-f0-9]{16}`)
// Expect error when the generator's output exceeds 63 characters.
p := make([]byte, 47) // 63 - 17 (length of the random suffix and underscore) + 1
for i := range p {
p[i] = byte('a')
}
_, err = RandomGroup(string(p))
c.Assert(err, qt.Not(qt.IsNil))
// Reject non-alphanumeric characters.
_, err = RandomGroup("/")
c.Assert(err, qt.Not(qt.IsNil))
}
func TestKprobeToken(t *testing.T) {
tests := []struct {
args ProbeArgs
expected string
}{
{ProbeArgs{Symbol: "symbol"}, "symbol"},
{ProbeArgs{Symbol: "symbol", Offset: 1}, "symbol+0x1"},
{ProbeArgs{Symbol: "symbol", Offset: 65535}, "symbol+0xffff"},
{ProbeArgs{Symbol: "symbol", Offset: 65536}, "symbol+0x10000"},
}
for i, tt := range tests {
t.Run(fmt.Sprint(i), func(t *testing.T) {
po := KprobeToken(tt.args)
if tt.expected != po {
t.Errorf("Expected symbol+offset to be '%s', got '%s'", tt.expected, po)
}
})
}
}
func TestNewEvent(t *testing.T) {
for _, args := range []ProbeArgs{
{Type: Kprobe, Symbol: ksym},
{Type: Kprobe, Symbol: ksym, Ret: true},
{Type: Uprobe, Path: "/bin/bash", Symbol: "main"},
{Type: Uprobe, Path: "/bin/bash", Symbol: "main", Ret: true},
} {
name := fmt.Sprintf("%s ret=%v", args.Type, args.Ret)
t.Run(name, func(t *testing.T) {
args.Group, _ = RandomGroup("ebpftest")
evt, err := NewEvent(args)
qt.Assert(t, err, qt.IsNil)
defer evt.Close()
_, err = NewEvent(args)
qt.Assert(t, err, qt.ErrorIs, os.ErrExist,
qt.Commentf("expected consecutive event creation to contain os.ErrExist"))
qt.Assert(t, evt.Close(), qt.IsNil)
qt.Assert(t, evt.Close(), qt.ErrorIs, os.ErrClosed)
})
}
}
@@ -0,0 +1,86 @@
package tracefs
import (
"errors"
"fmt"
"testing"
qt "github.com/frankban/quicktest"
)
func TestEventID(t *testing.T) {
c := qt.New(t)
eid, err := EventID("syscalls", "sys_enter_mmap")
c.Assert(err, qt.IsNil)
c.Assert(eid, qt.Not(qt.Equals), 0)
}
func TestSanitizePath(t *testing.T) {
_, err := sanitizeTracefsPath("../escaped")
if !errors.Is(err, ErrInvalidInput) {
t.Errorf("expected error %s, got: %s", ErrInvalidInput, err)
}
_, err = sanitizeTracefsPath("./not/escaped")
if err != nil {
t.Errorf("expected no error, got: %s", err)
}
}
func TestValidIdentifier(t *testing.T) {
tests := []struct {
name string
in string
fail bool
}{
{"empty string", "", true},
{"leading number", "1test", true},
{"underscore first", "__x64_syscall", false},
{"contains number", "bpf_trace_run1", false},
{"underscore", "_", false},
{"contains dash", "-EINVAL", true},
{"contains number", "all0wed", false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
exp := "pass"
if tt.fail {
exp = "fail"
}
if validIdentifier(tt.in) == tt.fail {
t.Errorf("expected string '%s' to %s valid ID check", tt.in, exp)
}
})
}
}
func TestSanitizeIdentifier(t *testing.T) {
tests := []struct {
symbol string
expected string
}{
{"readline", "readline"},
{"main.Func123", "main_Func123"},
{"a.....a", "a_a"},
{"./;'{}[]a", "_a"},
{"***xx**xx###", "_xx_xx_"},
{`@P#r$i%v^3*+t)i&k++--`, "_P_r_i_v_3_t_i_k_"},
}
for i, tt := range tests {
t.Run(fmt.Sprint(i), func(t *testing.T) {
sanitized := sanitizeIdentifier(tt.symbol)
if tt.expected != sanitized {
t.Errorf("Expected sanitized symbol to be '%s', got '%s'", tt.expected, sanitized)
}
})
}
}
func TestGetTracefsPath(t *testing.T) {
_, err := getTracefsPath()
qt.Assert(t, err, qt.IsNil)
}
@@ -0,0 +1,24 @@
// Code generated by "stringer -type=ProbeType -linecomment"; DO NOT EDIT.
package tracefs
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[Kprobe-0]
_ = x[Uprobe-1]
}
const _ProbeType_name = "kprobeuprobe"
var _ProbeType_index = [...]uint8{0, 6, 12}
func (i ProbeType) String() string {
if i >= ProbeType(len(_ProbeType_index)-1) {
return "ProbeType(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _ProbeType_name[_ProbeType_index[i]:_ProbeType_index[i+1]]
}
@@ -0,0 +1,16 @@
package tracefs
import "fmt"
// UprobeToken creates the PATH:OFFSET(REF_CTR_OFFSET) token for the tracefs api.
func UprobeToken(args ProbeArgs) string {
po := fmt.Sprintf("%s:%#x", args.Path, args.Offset)
if args.RefCtrOffset != 0 {
// This is not documented in Documentation/trace/uprobetracer.txt.
// elixir.bootlin.com/linux/v5.15-rc7/source/kernel/trace/trace.c#L5564
po += fmt.Sprintf("(%#x)", args.RefCtrOffset)
}
return po
}
@@ -0,0 +1,29 @@
package tracefs
import (
"fmt"
"testing"
)
func TestUprobeToken(t *testing.T) {
tests := []struct {
args ProbeArgs
expected string
}{
{ProbeArgs{Path: "/bin/bash"}, "/bin/bash:0x0"},
{ProbeArgs{Path: "/bin/bash", Offset: 1}, "/bin/bash:0x1"},
{ProbeArgs{Path: "/bin/bash", Offset: 65535}, "/bin/bash:0xffff"},
{ProbeArgs{Path: "/bin/bash", Offset: 65536}, "/bin/bash:0x10000"},
{ProbeArgs{Path: "/bin/bash", Offset: 1, RefCtrOffset: 1}, "/bin/bash:0x1(0x1)"},
{ProbeArgs{Path: "/bin/bash", Offset: 1, RefCtrOffset: 65535}, "/bin/bash:0x1(0xffff)"},
}
for i, tt := range tests {
t.Run(fmt.Sprint(i), func(t *testing.T) {
po := UprobeToken(tt.args)
if tt.expected != po {
t.Errorf("Expected path:offset to be '%s', got '%s'", tt.expected, po)
}
})
}
}
@@ -0,0 +1,11 @@
// Package unix re-exports Linux specific parts of golang.org/x/sys/unix.
//
// It avoids breaking compilation on other OS by providing stubs as follows:
// - Invoking a function always returns an error.
// - Errnos have distinct, non-zero values.
// - Constants have distinct but meaningless values.
// - Types use the same names for members, but may or may not follow the
// Linux layout.
package unix
// Note: please don't add any custom API to this package. Use internal/sys instead.
@@ -0,0 +1,202 @@
//go:build linux
package unix
import (
"syscall"
linux "golang.org/x/sys/unix"
)
const (
ENOENT = linux.ENOENT
EEXIST = linux.EEXIST
EAGAIN = linux.EAGAIN
ENOSPC = linux.ENOSPC
EINVAL = linux.EINVAL
EPOLLIN = linux.EPOLLIN
EINTR = linux.EINTR
EPERM = linux.EPERM
ESRCH = linux.ESRCH
ENODEV = linux.ENODEV
EBADF = linux.EBADF
E2BIG = linux.E2BIG
EFAULT = linux.EFAULT
EACCES = linux.EACCES
EILSEQ = linux.EILSEQ
EOPNOTSUPP = linux.EOPNOTSUPP
)
const (
BPF_F_NO_PREALLOC = linux.BPF_F_NO_PREALLOC
BPF_F_NUMA_NODE = linux.BPF_F_NUMA_NODE
BPF_F_RDONLY = linux.BPF_F_RDONLY
BPF_F_WRONLY = linux.BPF_F_WRONLY
BPF_F_RDONLY_PROG = linux.BPF_F_RDONLY_PROG
BPF_F_WRONLY_PROG = linux.BPF_F_WRONLY_PROG
BPF_F_SLEEPABLE = linux.BPF_F_SLEEPABLE
BPF_F_XDP_HAS_FRAGS = linux.BPF_F_XDP_HAS_FRAGS
BPF_F_MMAPABLE = linux.BPF_F_MMAPABLE
BPF_F_INNER_MAP = linux.BPF_F_INNER_MAP
BPF_F_KPROBE_MULTI_RETURN = linux.BPF_F_KPROBE_MULTI_RETURN
BPF_OBJ_NAME_LEN = linux.BPF_OBJ_NAME_LEN
BPF_TAG_SIZE = linux.BPF_TAG_SIZE
BPF_RINGBUF_BUSY_BIT = linux.BPF_RINGBUF_BUSY_BIT
BPF_RINGBUF_DISCARD_BIT = linux.BPF_RINGBUF_DISCARD_BIT
BPF_RINGBUF_HDR_SZ = linux.BPF_RINGBUF_HDR_SZ
SYS_BPF = linux.SYS_BPF
F_DUPFD_CLOEXEC = linux.F_DUPFD_CLOEXEC
EPOLL_CTL_ADD = linux.EPOLL_CTL_ADD
EPOLL_CLOEXEC = linux.EPOLL_CLOEXEC
O_CLOEXEC = linux.O_CLOEXEC
O_NONBLOCK = linux.O_NONBLOCK
PROT_NONE = linux.PROT_NONE
PROT_READ = linux.PROT_READ
PROT_WRITE = linux.PROT_WRITE
MAP_ANON = linux.MAP_ANON
MAP_SHARED = linux.MAP_SHARED
MAP_PRIVATE = linux.MAP_PRIVATE
PERF_ATTR_SIZE_VER1 = linux.PERF_ATTR_SIZE_VER1
PERF_TYPE_SOFTWARE = linux.PERF_TYPE_SOFTWARE
PERF_TYPE_TRACEPOINT = linux.PERF_TYPE_TRACEPOINT
PERF_COUNT_SW_BPF_OUTPUT = linux.PERF_COUNT_SW_BPF_OUTPUT
PERF_EVENT_IOC_DISABLE = linux.PERF_EVENT_IOC_DISABLE
PERF_EVENT_IOC_ENABLE = linux.PERF_EVENT_IOC_ENABLE
PERF_EVENT_IOC_SET_BPF = linux.PERF_EVENT_IOC_SET_BPF
PerfBitWatermark = linux.PerfBitWatermark
PerfBitWriteBackward = linux.PerfBitWriteBackward
PERF_SAMPLE_RAW = linux.PERF_SAMPLE_RAW
PERF_FLAG_FD_CLOEXEC = linux.PERF_FLAG_FD_CLOEXEC
RLIM_INFINITY = linux.RLIM_INFINITY
RLIMIT_MEMLOCK = linux.RLIMIT_MEMLOCK
BPF_STATS_RUN_TIME = linux.BPF_STATS_RUN_TIME
PERF_RECORD_LOST = linux.PERF_RECORD_LOST
PERF_RECORD_SAMPLE = linux.PERF_RECORD_SAMPLE
AT_FDCWD = linux.AT_FDCWD
RENAME_NOREPLACE = linux.RENAME_NOREPLACE
SO_ATTACH_BPF = linux.SO_ATTACH_BPF
SO_DETACH_BPF = linux.SO_DETACH_BPF
SOL_SOCKET = linux.SOL_SOCKET
SIGPROF = linux.SIGPROF
SIG_BLOCK = linux.SIG_BLOCK
SIG_UNBLOCK = linux.SIG_UNBLOCK
EM_NONE = linux.EM_NONE
EM_BPF = linux.EM_BPF
BPF_FS_MAGIC = linux.BPF_FS_MAGIC
TRACEFS_MAGIC = linux.TRACEFS_MAGIC
DEBUGFS_MAGIC = linux.DEBUGFS_MAGIC
)
type Statfs_t = linux.Statfs_t
type Stat_t = linux.Stat_t
type Rlimit = linux.Rlimit
type Signal = linux.Signal
type Sigset_t = linux.Sigset_t
type PerfEventMmapPage = linux.PerfEventMmapPage
type EpollEvent = linux.EpollEvent
type PerfEventAttr = linux.PerfEventAttr
type Utsname = linux.Utsname
func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
return linux.Syscall(trap, a1, a2, a3)
}
func PthreadSigmask(how int, set, oldset *Sigset_t) error {
return linux.PthreadSigmask(how, set, oldset)
}
func FcntlInt(fd uintptr, cmd, arg int) (int, error) {
return linux.FcntlInt(fd, cmd, arg)
}
func IoctlSetInt(fd int, req uint, value int) error {
return linux.IoctlSetInt(fd, req, value)
}
func Statfs(path string, buf *Statfs_t) (err error) {
return linux.Statfs(path, buf)
}
func Close(fd int) (err error) {
return linux.Close(fd)
}
func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) {
return linux.EpollWait(epfd, events, msec)
}
func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) {
return linux.EpollCtl(epfd, op, fd, event)
}
func Eventfd(initval uint, flags int) (fd int, err error) {
return linux.Eventfd(initval, flags)
}
func Write(fd int, p []byte) (n int, err error) {
return linux.Write(fd, p)
}
func EpollCreate1(flag int) (fd int, err error) {
return linux.EpollCreate1(flag)
}
func SetNonblock(fd int, nonblocking bool) (err error) {
return linux.SetNonblock(fd, nonblocking)
}
func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) {
return linux.Mmap(fd, offset, length, prot, flags)
}
func Munmap(b []byte) (err error) {
return linux.Munmap(b)
}
func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) {
return linux.PerfEventOpen(attr, pid, cpu, groupFd, flags)
}
func Uname(buf *Utsname) (err error) {
return linux.Uname(buf)
}
func Getpid() int {
return linux.Getpid()
}
func Gettid() int {
return linux.Gettid()
}
func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) {
return linux.Tgkill(tgid, tid, sig)
}
func BytePtrFromString(s string) (*byte, error) {
return linux.BytePtrFromString(s)
}
func ByteSliceToString(s []byte) string {
return linux.ByteSliceToString(s)
}
func Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags uint) error {
return linux.Renameat2(olddirfd, oldpath, newdirfd, newpath, flags)
}
func Prlimit(pid, resource int, new, old *Rlimit) error {
return linux.Prlimit(pid, resource, new, old)
}
func Open(path string, mode int, perm uint32) (int, error) {
return linux.Open(path, mode, perm)
}
func Fstat(fd int, stat *Stat_t) error {
return linux.Fstat(fd, stat)
}
func SetsockoptInt(fd, level, opt, value int) error {
return linux.SetsockoptInt(fd, level, opt, value)
}
@@ -0,0 +1,294 @@
//go:build !linux
package unix
import (
"fmt"
"runtime"
"syscall"
)
var errNonLinux = fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
// Errnos are distinct and non-zero.
const (
ENOENT syscall.Errno = iota + 1
EEXIST
EAGAIN
ENOSPC
EINVAL
EINTR
EPERM
ESRCH
ENODEV
EBADF
E2BIG
EFAULT
EACCES
EILSEQ
EOPNOTSUPP
)
// Constants are distinct to avoid breaking switch statements.
const (
BPF_F_NO_PREALLOC = iota
BPF_F_NUMA_NODE
BPF_F_RDONLY
BPF_F_WRONLY
BPF_F_RDONLY_PROG
BPF_F_WRONLY_PROG
BPF_F_SLEEPABLE
BPF_F_MMAPABLE
BPF_F_INNER_MAP
BPF_F_KPROBE_MULTI_RETURN
BPF_F_XDP_HAS_FRAGS
BPF_OBJ_NAME_LEN
BPF_TAG_SIZE
BPF_RINGBUF_BUSY_BIT
BPF_RINGBUF_DISCARD_BIT
BPF_RINGBUF_HDR_SZ
SYS_BPF
F_DUPFD_CLOEXEC
EPOLLIN
EPOLL_CTL_ADD
EPOLL_CLOEXEC
O_CLOEXEC
O_NONBLOCK
PROT_NONE
PROT_READ
PROT_WRITE
MAP_ANON
MAP_SHARED
MAP_PRIVATE
PERF_ATTR_SIZE_VER1
PERF_TYPE_SOFTWARE
PERF_TYPE_TRACEPOINT
PERF_COUNT_SW_BPF_OUTPUT
PERF_EVENT_IOC_DISABLE
PERF_EVENT_IOC_ENABLE
PERF_EVENT_IOC_SET_BPF
PerfBitWatermark
PerfBitWriteBackward
PERF_SAMPLE_RAW
PERF_FLAG_FD_CLOEXEC
RLIM_INFINITY
RLIMIT_MEMLOCK
BPF_STATS_RUN_TIME
PERF_RECORD_LOST
PERF_RECORD_SAMPLE
AT_FDCWD
RENAME_NOREPLACE
SO_ATTACH_BPF
SO_DETACH_BPF
SOL_SOCKET
SIGPROF
SIG_BLOCK
SIG_UNBLOCK
EM_NONE
EM_BPF
BPF_FS_MAGIC
TRACEFS_MAGIC
DEBUGFS_MAGIC
)
type Statfs_t struct {
Type int64
Bsize int64
Blocks uint64
Bfree uint64
Bavail uint64
Files uint64
Ffree uint64
Fsid [2]int32
Namelen int64
Frsize int64
Flags int64
Spare [4]int64
}
type Stat_t struct {
Dev uint64
Ino uint64
Nlink uint64
Mode uint32
Uid uint32
Gid uint32
_ int32
Rdev uint64
Size int64
Blksize int64
Blocks int64
}
type Rlimit struct {
Cur uint64
Max uint64
}
type Signal int
type Sigset_t struct {
Val [4]uint64
}
func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
return 0, 0, syscall.ENOTSUP
}
func PthreadSigmask(how int, set, oldset *Sigset_t) error {
return errNonLinux
}
func FcntlInt(fd uintptr, cmd, arg int) (int, error) {
return -1, errNonLinux
}
func IoctlSetInt(fd int, req uint, value int) error {
return errNonLinux
}
func Statfs(path string, buf *Statfs_t) error {
return errNonLinux
}
func Close(fd int) (err error) {
return errNonLinux
}
type EpollEvent struct {
Events uint32
Fd int32
Pad int32
}
func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) {
return 0, errNonLinux
}
func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) {
return errNonLinux
}
func Eventfd(initval uint, flags int) (fd int, err error) {
return 0, errNonLinux
}
func Write(fd int, p []byte) (n int, err error) {
return 0, errNonLinux
}
func EpollCreate1(flag int) (fd int, err error) {
return 0, errNonLinux
}
type PerfEventMmapPage struct {
Version uint32
Compat_version uint32
Lock uint32
Index uint32
Offset int64
Time_enabled uint64
Time_running uint64
Capabilities uint64
Pmc_width uint16
Time_shift uint16
Time_mult uint32
Time_offset uint64
Time_zero uint64
Size uint32
Data_head uint64
Data_tail uint64
Data_offset uint64
Data_size uint64
Aux_head uint64
Aux_tail uint64
Aux_offset uint64
Aux_size uint64
}
func SetNonblock(fd int, nonblocking bool) (err error) {
return errNonLinux
}
func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) {
return []byte{}, errNonLinux
}
func Munmap(b []byte) (err error) {
return errNonLinux
}
type PerfEventAttr struct {
Type uint32
Size uint32
Config uint64
Sample uint64
Sample_type uint64
Read_format uint64
Bits uint64
Wakeup uint32
Bp_type uint32
Ext1 uint64
Ext2 uint64
Branch_sample_type uint64
Sample_regs_user uint64
Sample_stack_user uint32
Clockid int32
Sample_regs_intr uint64
Aux_watermark uint32
Sample_max_stack uint16
}
func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) {
return 0, errNonLinux
}
type Utsname struct {
Release [65]byte
Version [65]byte
}
func Uname(buf *Utsname) (err error) {
return errNonLinux
}
func Getpid() int {
return -1
}
func Gettid() int {
return -1
}
func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) {
return errNonLinux
}
func BytePtrFromString(s string) (*byte, error) {
return nil, errNonLinux
}
func ByteSliceToString(s []byte) string {
return ""
}
func Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags uint) error {
return errNonLinux
}
func Prlimit(pid, resource int, new, old *Rlimit) error {
return errNonLinux
}
func Open(path string, mode int, perm uint32) (int, error) {
return -1, errNonLinux
}
func Fstat(fd int, stat *Stat_t) error {
return errNonLinux
}
func SetsockoptInt(fd, level, opt, value int) error {
return errNonLinux
}
@@ -0,0 +1,153 @@
package internal
import (
"debug/elf"
"encoding/binary"
"errors"
"fmt"
"io"
"math"
"os"
"github.com/cilium/ebpf/internal/unix"
)
var (
errAuxvNoVDSO = errors.New("no vdso address found in auxv")
)
// vdsoVersion returns the LINUX_VERSION_CODE embedded in the vDSO library
// linked into the current process image.
func vdsoVersion() (uint32, error) {
// Read data from the auxiliary vector, which is normally passed directly
// to the process. Go does not expose that data, so we must read it from procfs.
// https://man7.org/linux/man-pages/man3/getauxval.3.html
av, err := os.Open("/proc/self/auxv")
if errors.Is(err, unix.EACCES) {
return 0, fmt.Errorf("opening auxv: %w (process may not be dumpable due to file capabilities)", err)
}
if err != nil {
return 0, fmt.Errorf("opening auxv: %w", err)
}
defer av.Close()
vdsoAddr, err := vdsoMemoryAddress(av)
if err != nil {
return 0, fmt.Errorf("finding vDSO memory address: %w", err)
}
// Use /proc/self/mem rather than unsafe.Pointer tricks.
mem, err := os.Open("/proc/self/mem")
if err != nil {
return 0, fmt.Errorf("opening mem: %w", err)
}
defer mem.Close()
// Open ELF at provided memory address, as offset into /proc/self/mem.
c, err := vdsoLinuxVersionCode(io.NewSectionReader(mem, int64(vdsoAddr), math.MaxInt64))
if err != nil {
return 0, fmt.Errorf("reading linux version code: %w", err)
}
return c, nil
}
// vdsoMemoryAddress returns the memory address of the vDSO library
// linked into the current process image. r is an io.Reader into an auxv blob.
func vdsoMemoryAddress(r io.Reader) (uint64, error) {
const (
_AT_NULL = 0 // End of vector
_AT_SYSINFO_EHDR = 33 // Offset to vDSO blob in process image
)
// Loop through all tag/value pairs in auxv until we find `AT_SYSINFO_EHDR`,
// the address of a page containing the virtual Dynamic Shared Object (vDSO).
aux := struct{ Tag, Val uint64 }{}
for {
if err := binary.Read(r, NativeEndian, &aux); err != nil {
return 0, fmt.Errorf("reading auxv entry: %w", err)
}
switch aux.Tag {
case _AT_SYSINFO_EHDR:
if aux.Val != 0 {
return aux.Val, nil
}
return 0, fmt.Errorf("invalid vDSO address in auxv")
// _AT_NULL is always the last tag/val pair in the aux vector
// and can be treated like EOF.
case _AT_NULL:
return 0, errAuxvNoVDSO
}
}
}
// format described at https://www.man7.org/linux/man-pages/man5/elf.5.html in section 'Notes (Nhdr)'
type elfNoteHeader struct {
NameSize int32
DescSize int32
Type int32
}
// vdsoLinuxVersionCode returns the LINUX_VERSION_CODE embedded in
// the ELF notes section of the binary provided by the reader.
func vdsoLinuxVersionCode(r io.ReaderAt) (uint32, error) {
hdr, err := NewSafeELFFile(r)
if err != nil {
return 0, fmt.Errorf("reading vDSO ELF: %w", err)
}
sections := hdr.SectionsByType(elf.SHT_NOTE)
if len(sections) == 0 {
return 0, fmt.Errorf("no note section found in vDSO ELF")
}
for _, sec := range sections {
sr := sec.Open()
var n elfNoteHeader
// Read notes until we find one named 'Linux'.
for {
if err := binary.Read(sr, hdr.ByteOrder, &n); err != nil {
if errors.Is(err, io.EOF) {
// We looked at all the notes in this section
break
}
return 0, fmt.Errorf("reading note header: %w", err)
}
// If a note name is defined, it follows the note header.
var name string
if n.NameSize > 0 {
// Read the note name, aligned to 4 bytes.
buf := make([]byte, Align(n.NameSize, 4))
if err := binary.Read(sr, hdr.ByteOrder, &buf); err != nil {
return 0, fmt.Errorf("reading note name: %w", err)
}
// Read nul-terminated string.
name = unix.ByteSliceToString(buf[:n.NameSize])
}
// If a note descriptor is defined, it follows the name.
// It is possible for a note to have a descriptor but not a name.
if n.DescSize > 0 {
// LINUX_VERSION_CODE is a uint32 value.
if name == "Linux" && n.DescSize == 4 && n.Type == 0 {
var version uint32
if err := binary.Read(sr, hdr.ByteOrder, &version); err != nil {
return 0, fmt.Errorf("reading note descriptor: %w", err)
}
return version, nil
}
// Discard the note descriptor if it exists but we're not interested in it.
if _, err := io.CopyN(io.Discard, sr, int64(Align(n.DescSize, 4))); err != nil {
return 0, err
}
}
}
}
return 0, fmt.Errorf("no Linux note in ELF")
}
@@ -0,0 +1,74 @@
package internal
import (
"errors"
"os"
"testing"
)
func TestAuxvVDSOMemoryAddress(t *testing.T) {
av, err := os.Open("../testdata/auxv.bin")
if err != nil {
t.Fatal(err)
}
t.Cleanup(func() { av.Close() })
addr, err := vdsoMemoryAddress(av)
if err != nil {
t.Fatal(err)
}
expected := uint64(0x7ffd377e5000)
if addr != expected {
t.Errorf("Expected vDSO memory address %x, got %x", expected, addr)
}
}
func TestAuxvNoVDSO(t *testing.T) {
// Copy of auxv.bin with the vDSO pointer removed.
av, err := os.Open("../testdata/auxv_no_vdso.bin")
if err != nil {
t.Fatal(err)
}
t.Cleanup(func() { av.Close() })
_, err = vdsoMemoryAddress(av)
if want, got := errAuxvNoVDSO, err; !errors.Is(got, want) {
t.Fatalf("expected error '%v', got: %v", want, got)
}
}
func TestLinuxVersionCodeEmbedded(t *testing.T) {
tests := []struct {
file string
version uint32
}{
{
"../testdata/vdso.bin",
uint32(328828), // 5.4.124
},
{
"../testdata/vdso_multiple_notes.bin",
uint32(328875), // Container Optimized OS v85 with a 5.4.x kernel
},
}
for _, test := range tests {
t.Run(test.file, func(t *testing.T) {
vdso, err := os.Open(test.file)
if err != nil {
t.Fatal(err)
}
defer vdso.Close()
vc, err := vdsoLinuxVersionCode(vdso)
if err != nil {
t.Fatal(err)
}
if vc != test.version {
t.Errorf("Expected version code %d, got %d", test.version, vc)
}
})
}
}
@@ -0,0 +1,106 @@
package internal
import (
"fmt"
"github.com/cilium/ebpf/internal/unix"
)
const (
// Version constant used in ELF binaries indicating that the loader needs to
// substitute the eBPF program's version with the value of the kernel's
// KERNEL_VERSION compile-time macro. Used for compatibility with BCC, gobpf
// and RedSift.
MagicKernelVersion = 0xFFFFFFFE
)
// A Version in the form Major.Minor.Patch.
type Version [3]uint16
// NewVersion creates a version from a string like "Major.Minor.Patch".
//
// Patch is optional.
func NewVersion(ver string) (Version, error) {
var major, minor, patch uint16
n, _ := fmt.Sscanf(ver, "%d.%d.%d", &major, &minor, &patch)
if n < 2 {
return Version{}, fmt.Errorf("invalid version: %s", ver)
}
return Version{major, minor, patch}, nil
}
// NewVersionFromCode creates a version from a LINUX_VERSION_CODE.
func NewVersionFromCode(code uint32) Version {
return Version{
uint16(uint8(code >> 16)),
uint16(uint8(code >> 8)),
uint16(uint8(code)),
}
}
func (v Version) String() string {
if v[2] == 0 {
return fmt.Sprintf("v%d.%d", v[0], v[1])
}
return fmt.Sprintf("v%d.%d.%d", v[0], v[1], v[2])
}
// Less returns true if the version is less than another version.
func (v Version) Less(other Version) bool {
for i, a := range v {
if a == other[i] {
continue
}
return a < other[i]
}
return false
}
// Unspecified returns true if the version is all zero.
func (v Version) Unspecified() bool {
return v[0] == 0 && v[1] == 0 && v[2] == 0
}
// Kernel implements the kernel's KERNEL_VERSION macro from linux/version.h.
// It represents the kernel version and patch level as a single value.
func (v Version) Kernel() uint32 {
// Kernels 4.4 and 4.9 have their SUBLEVEL clamped to 255 to avoid
// overflowing into PATCHLEVEL.
// See kernel commit 9b82f13e7ef3 ("kbuild: clamp SUBLEVEL to 255").
s := v[2]
if s > 255 {
s = 255
}
// Truncate members to uint8 to prevent them from spilling over into
// each other when overflowing 8 bits.
return uint32(uint8(v[0]))<<16 | uint32(uint8(v[1]))<<8 | uint32(uint8(s))
}
// KernelVersion returns the version of the currently running kernel.
var KernelVersion = Memoize(func() (Version, error) {
return detectKernelVersion()
})
// detectKernelVersion returns the version of the running kernel.
func detectKernelVersion() (Version, error) {
vc, err := vdsoVersion()
if err != nil {
return Version{}, err
}
return NewVersionFromCode(vc), nil
}
// KernelRelease returns the release string of the running kernel.
// Its format depends on the Linux distribution and corresponds to directory
// names in /lib/modules by convention. Some examples are 5.15.17-1-lts and
// 4.19.0-16-amd64.
func KernelRelease() (string, error) {
var uname unix.Utsname
if err := unix.Uname(&uname); err != nil {
return "", fmt.Errorf("uname failed: %w", err)
}
return unix.ByteSliceToString(uname.Release[:]), nil
}
@@ -0,0 +1,97 @@
package internal
import (
"os"
"testing"
)
func TestVersion(t *testing.T) {
a, err := NewVersion("1.2")
if err != nil {
t.Fatal(err)
}
b, err := NewVersion("2.2.1")
if err != nil {
t.Fatal(err)
}
if !a.Less(b) {
t.Error("A should be less than B")
}
if b.Less(a) {
t.Error("B shouldn't be less than A")
}
v200 := Version{2, 0, 0}
if !a.Less(v200) {
t.Error("1.2.1 should not be less than 2.0.0")
}
if v200.Less(a) {
t.Error("2.0.0 should not be less than 1.2.1")
}
}
func TestKernelVersion(t *testing.T) {
// Kernels 4.4 and 4.9 have a SUBLEVEL of over 255 and clamp it to 255.
// In our implementation, the other version segments are truncated.
if v, want := (Version{256, 256, 256}), uint32(255); v.Kernel() != want {
t.Errorf("256.256.256 should result in a kernel version of %d, got: %d", want, v.Kernel())
}
// Known good version.
if v, want := (Version{4, 9, 128}), uint32(264576); v.Kernel() != want {
t.Errorf("4.9.1 should result in a kernel version of %d, got: %d", want, v.Kernel())
}
}
func TestCurrentKernelVersion(t *testing.T) {
v, err := KernelVersion()
if err != nil {
t.Fatal(err)
}
if evStr := os.Getenv("KERNEL_VERSION"); evStr != "" {
ev, err := NewVersion(evStr)
if err != nil {
t.Fatal(err)
}
if ev[0] != v[0] || ev[1] != v[1] {
t.Errorf("expected kernel version %d.%d, got %d.%d", ev[0], ev[1], v[0], v[1])
}
}
}
func TestVersionFromCode(t *testing.T) {
var tests = []struct {
name string
code uint32
v Version
}{
{"0.0.0", 0, Version{0, 0, 0}},
{"1.0.0", 0x10000, Version{1, 0, 0}},
{"4.4.255", 0x404ff, Version{4, 4, 255}},
{"255.255.255", 0xffffff, Version{255, 255, 255}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
v := NewVersionFromCode(tt.code)
if v != tt.v {
t.Errorf("unexpected version for code '%d'. got: %v, want: %v", tt.code, v, tt.v)
}
})
}
}
func TestKernelRelease(t *testing.T) {
r, err := KernelRelease()
if err != nil {
t.Fatal(err)
}
if r == "" {
t.Fatal("unexpected empty kernel release")
}
}