3

我正在玩 gobpf 并且在计算跟踪的用户空间函数的持续时间时遇到了问题。我使用 bpf_ktime_get_ns() 来读取时间,然后尝试计算增量,但是得到了巨大的数字,尽管跟踪的函数只休眠了 1 秒。

这是经过测试的 C 程序,它有一个名为“ameba”的函数。

#include <stdio.h>
#include <strings.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>

char * ameba(char * s1, char * s2);

int main(void) {
    time_t rawtime;
    struct tm * timeinfo;

    time(&rawtime);
    timeinfo = localtime(&rawtime);
    printf("enter: %s", asctime (timeinfo));
    printf("%s\n", ameba("lessqqmorepewpew", "good luck, have fun"));

    time(&rawtime);
    timeinfo = localtime(&rawtime);
    printf("return: %s", asctime(timeinfo));
}

char * ameba(char * s1, char * s2) {
    char *s;
    s = (char *) malloc(128);
    sleep(1);
    snprintf(s, 128, "phrase1: %s, phrase2: %s", s1, s2);
    return s;
}

去代码

package main

import (
    "bytes"
    "encoding/binary"
    "fmt"
    "os"
    "os/signal"
    "time"

    bpf "github.com/iovisor/gobpf/bcc"
)

const source string = `
#include <uapi/linux/ptrace.h>
#include <linux/sched.h>
struct val_t {
    u32 pid;
    char comm[TASK_COMM_LEN];
    u64 ts;
};
struct data_t {
    u32 pid;
    char comm[TASK_COMM_LEN];
    u64 delta;    
};
BPF_HASH(start, u32, struct val_t);
BPF_PERF_OUTPUT(ameba_events);
int do_entry(struct pt_regs *ctx) {
    if (!PT_REGS_PARM1(ctx))
        return 0;
    struct val_t val = {};
    u32 pid = bpf_get_current_pid_tgid();
    if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) {
        val.pid = bpf_get_current_pid_tgid();
        val.ts = bpf_ktime_get_ns();
        start.update(&pid, &val);
    }
    return 0;
}
int do_return(struct pt_regs *ctx) {
    struct val_t *valp;
    struct data_t data = {};
    u64 delta;
    u32 pid = bpf_get_current_pid_tgid();
    u64 tsp = bpf_ktime_get_ns();
    valp = start.lookup(&pid);

    if (valp == 0)
        return 0;       // missed start

    bpf_probe_read(&data.comm, sizeof(data.comm), valp->comm);
    data.pid = valp->pid;
    data.delta = tsp - valp->ts;
    ameba_events.perf_submit(ctx, &data, sizeof(data));
    start.delete(&pid);

    return 0;
}
`

type amebaEvent struct {
    Pid uint32
    Comm [16]byte
    Delta uint64
}

func main() {
    m := bpf.NewModule(source, []string{})
    defer m.Close()

    amebaUprobe, err := m.LoadUprobe("do_entry")
    if err != nil {
        fmt.Fprintf(os.Stderr, "Failed to load do_entry: %s\n", err)
        os.Exit(1)
    }
    amebaUretprobe, err := m.LoadUprobe("do_return")
    if err != nil {
        fmt.Fprintf(os.Stderr, "Failed to load do_return: %s\n", err)
        os.Exit(1)
    }

    err = m.AttachUprobe("/tmp/sandbox/ameba", "ameba", amebaUprobe, -1)
    if err != nil {
        fmt.Fprintf(os.Stderr, "Failed to attach do_entry uprobe: %s\n", err)
        os.Exit(1)
    }
    err = m.AttachUretprobe("/tmp/sandbox/ameba", "ameba", amebaUretprobe, -1)
    if err != nil {
        fmt.Fprintf(os.Stderr, "Failed to attach do_return uretprobe: %s\n", err)
        os.Exit(1)
    }

    table := bpf.NewTable(m.TableId("ameba_events"), m)

    channel := make(chan []byte)

    perfMap, err := bpf.InitPerfMap(table, channel)
    if err != nil {
        fmt.Fprintf(os.Stderr, "Failed to init perf map: %s\n", err)
        os.Exit(1)
    }

    sig := make(chan os.Signal, 1)
    signal.Notify(sig, os.Interrupt, os.Kill)

    fmt.Printf("%10s\t%s\t%s\t%s\n", "PID", "COMMAND", "DURATION", "RAW")
    go func() {
        var event amebaEvent
        for {
            data := <-channel
            err := binary.Read(bytes.NewBuffer(data), binary.LittleEndian, &event)
            if err != nil {
                fmt.Printf("failed to decode received data: %s\n", err)
                continue
            }
            // Convert C string (null-terminated) to Go string
            comm := string(event.Comm[:bytes.IndexByte(event.Comm[:], 0)])
            fmt.Printf("%10d\t%s\t%s\t%d\n", event.Pid, comm, time.Duration(event.Delta), event.Delta)
        }
    }()

    perfMap.Start()
    <-sig
    perfMap.Stop()
}

程序输出示例:

PID 命令持续时间 RAW
15515 ameba 1193179h21m9.457496929s 4295445669457496929 15550
ameba 1193198h27m37.653709676s 4295514457653709676

哪里错了??

4

1 回答 1

4

这看起来像是由内核和用户端之间的填充不匹配引起的。该data_t结构实际​​上在编译时填充以等效于以下内容:

struct data_t {
   u32 pid;
   char padding[4];
   char comm[TASK_COMM_LEN];
   u64 delta;    
};

如果您在 Go 端明确添加相同的填充,您的问题就会消失:

type amebaEvent struct {
    Pid uint32
    Pad [4]byte
    Comm [16]byte
    Delta uint64
}

产生:

PID COMMAND DURATION    RAW
8258    a   1.000179625s    1000179625
8260    a   1.000158337s    1000158337

正如您在评论中提到的,另一种解决方案是打包 C 结构以删除填充,使用__attribute__((packed)).


它与 bcc 上的这个问题密切相关——尽管不相同——:https ://github.com/iovisor/bcc/issues/2017 。

于 2018-11-15T21:55:40.553 回答