From 58b1af7139bd2c4a2682382261fcc545b86d8685 Mon Sep 17 00:00:00 2001 From: We-unite <3205135446@qq.com> Date: Thu, 18 Jul 2024 14:28:52 +0800 Subject: Mainly finish the second coroutine, organize event As is planed, the first coroutine throw rae event infomation to the second, and it organizes all info for the same event accroding to event id, which is unique without shutdown of this computer. There's several defficuties I've encountered, so I list their solution here to remeber: - raw info from 1st coroutine is correct, but wrong when 2nd gets it; or it's correct while recieved, then regular expr goes to match it, the first match is inline with expectations, but the next match goes totally wrong, and the info is different from what is received. Look into the src of go-libaudit, we'll find out that when heard from netlink socket, the read buffer is always the same slice, it first received a long data, then **pass the origin slice to rawEvent.Data**, and then received a shorter data. rawEvent.Data is passed to 2nd coruntine as **a pointer to rawEvent**, which means all this 3 process use the same part of memory. Then, when a shorter info comes from socket, the slice won't be moved, otherwise it write aigin to this part of mem, then coroutine 2 will get a dirty data. To deal with it, we change the type of channel from pointer to interface, and make a deep copy of rawEvent before passing down. As a result, the 2nd coroutine gets a copy of message but not origin, it finally comes right. - While designing a regular expr, it's thought correct but miss matched from the right string. There maybe sth wrong that can't be discovered by people's eye, you can try to rewrite the expr, then it may be fixed. Also, there's some hidden dangers: - 2nd coroutine comes with no error checks althouth err variable is set and catched ubder the rules of compiler. we **shall** make it later. - Is it reasonable to pass cooked event info immediately to 3rd coroutine without waiting some time? Info from network is out of order after all. Fight! Fight! Fight! --- .gitignore | 3 +- go.mod | 1 + go.sum | 2 + godo.go | 261 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 262 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 6cff91d..8bbe3e4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .vscode/* -godo \ No newline at end of file +godo + diff --git a/go.mod b/go.mod index c73fc4f..2969b32 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.21.5 require ( github.com/elastic/go-libaudit/v2 v2.5.0 + github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 gopkg.in/yaml.v3 v3.0.1 ) diff --git a/go.sum b/go.sum index 6880c39..7ce498a 100644 --- a/go.sum +++ b/go.sum @@ -7,6 +7,8 @@ github.com/elastic/go-licenser v0.4.1 h1:1xDURsc8pL5zYT9R29425J3vkHdt4RT5TNEMeRN github.com/elastic/go-licenser v0.4.1/go.mod h1:V56wHMpmdURfibNBggaSBfqgPxyT1Tldns1i87iTEvU= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= +github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9M+97sNutRR1RKhG96O6jWumTTnw= +github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= diff --git a/godo.go b/godo.go index 6b6f48f..2ba97d1 100644 --- a/godo.go +++ b/godo.go @@ -9,12 +9,15 @@ import ( "os" "os/exec" "path/filepath" + "regexp" "strconv" "strings" "sync" + "time" "github.com/elastic/go-libaudit/v2" "github.com/elastic/go-libaudit/v2/auparse" + "github.com/mohae/deepcopy" ) var ( @@ -26,8 +29,19 @@ var ( receiveOnly = fs.Bool("ro", false, "receive only using multicast, requires kernel 3.16+") ) +type Event struct { + timestamp time.Time + pid, ppid int + syscall int + argc int + args []string + cwd string +} + type process struct { cmdline string + argv []string + cwd string rootfs string children []int } @@ -35,6 +49,10 @@ type process struct { var pids map[int]*process //古希腊掌管进程的神 var containers map[string]int // 古希腊掌管容器的神 var wg sync.WaitGroup // 掌管协程 +var rawChan chan interface{} // 从接收到整理的管道,这里不是原始数据类型,下文解释 +var cookedChan chan Event // 整理好的信息的管道 + +var syscallTable [500]string //记录一下系统调用 func main() { // 检查用户身份,并添加auditd规则,监听所有syscall @@ -42,7 +60,14 @@ func main() { fmt.Printf("Err: Please run me as root, %d!\n", os.Getegid()) return } - syscall := [5]string{"fork", "vfork", "execve", "exit", "exit_group"} + + // 所有的系统调用号与名称的关系 + err := figureOutSyscalls() + if err != nil { + fmt.Printf("Error figuring out syscall numbers: %v\n", err) + } + + syscall := [6]string{"fork", "vfork", "clone", "execve", "exit", "exit_group"} var auditCmd *exec.Cmd auditCmd = exec.Command("auditctl", "-D") // 清空所有规则 auditCmd.Run() @@ -58,9 +83,14 @@ func main() { fmt.Printf("Error finding containerd: %v\n", err) return } - + // 数据结构初始化 pids = make(map[int]*process) containers = make(map[string]int) + + // 创世之神,1号进程 + pids[1] = &process{rootfs: "/", children: make([]int, 0)} + pids[1].children = append(pids[1].children, containerdPid) + // /usr/bin/containerd,也就是我们最关注的进程 pids[containerdPid] = &process{cmdline: "/usr/bin/cmdline", rootfs: "/", children: make([]int, 0)} // 开始运行,解析命令行参数后监听 @@ -73,6 +103,29 @@ func main() { } } +func figureOutSyscalls() error { + NRRegex := regexp.MustCompile(`#define __NR_(.*?) (\d+)$`) + file, err := os.Open("/usr/include/asm/unistd_64.h") + if err != nil { + return err + } + defer file.Close() + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + if NRRegex.MatchString(line) { + match := NRRegex.FindStringSubmatch(line) + num, err := strconv.Atoi(match[2]) + if err != nil { + return err + } + syscallTable[num] = match[1] + } + } + return nil +} + func getPid() (int, error) { // 指定要搜索的关键词 keyword := "/usr/bin/containerd" @@ -122,6 +175,37 @@ func containsKeyword(pid int, keyword string) bool { return false } +func getTimeFromStr(timeStr string) (time.Time, error) { + timestampFloat, err := strconv.ParseFloat(timeStr, 64) + if err != nil { + return time.Unix(0, 0), err + } + secs := int64(timestampFloat) + nsecs := int64((timestampFloat - float64(secs)) * 1e9) + + // 只精确到毫秒就够了 + t := time.Unix(secs, nsecs).Truncate(time.Millisecond) + return t, nil +} + +func hexToAscii(hexString string) string { + bytes := []byte{} + for i := 0; i < len(hexString); i += 2 { + hexPair := hexString[i : i+2] + // 将十六进制数转换为十进制数 + decimal, err := strconv.ParseInt(hexPair, 16, 8) + if err != nil { + return "Invalid hex string" + } + char := byte(decimal) + bytes = append(bytes, char) + } + + asciiString := strings.ReplaceAll(string(bytes), "\000", " ") + + return asciiString +} + func read() error { // Write netlink response to a file for further analysis or for writing // tests cases. @@ -192,10 +276,25 @@ func read() error { } } - return receive(client) + // 各协程至此开始 + // return receive(client) + rawChan = make(chan interface{}) + cookedChan = make(chan Event) + wg.Add(1) + go receive(client) + wg.Add(1) + go orgnaze() + wg.Add(1) + go deal() + + wg.Wait() + time.Sleep(2 * time.Second) + return nil } func receive(r *libaudit.AuditClient) error { + defer wg.Done() + defer close(rawChan) for { rawEvent, err := r.Receive(false) if err != nil { @@ -208,6 +307,160 @@ func receive(r *libaudit.AuditClient) error { continue } - fmt.Printf("type=%v msg=%s\n", rawEvent.Type, rawEvent.Data) + rawEventMessage := deepcopy.Copy(*rawEvent) + rawChan <- rawEventMessage + } +} + +func orgnaze() { + defer wg.Done() + defer close(cookedChan) + // 接收信息 + var raw interface{} + var ok bool + var rawEvent libaudit.RawAuditMessage + // 事件信息 + var eventId, argc int + var err [6]error + var event, cooked Event + // 为每个事务id存储其信息,事务id在操作系统运行期间是唯一的 + eventTable := make(map[int]*Event) + // 要用的正则匹配列表 + syscallRegex := regexp.MustCompile(`audit\((\d+\.\d+):(\d+)\).*?syscall=(\d+).*?ppid=(\d+) pid=(\d+).*?$`) + execveRegex := regexp.MustCompile(`audit\(\d+\.\d+:(\d+)\): argc=(\d+)`) + argsRegex := regexp.MustCompile(`a\d+=("(.*?)"|([0-9a-fA-F]+))`) + cwdRegex := regexp.MustCompile(`audit\(\d+\.\d+:(\d+)\): cwd="(.*?)"`) + proctitleRegex := regexp.MustCompile(`audit\(\d+\.\d+:(\d+)\): proctitle=("(.*?)"|([0-9a-fA-F]+))$`) + eoeRegex := regexp.MustCompile(`audit\(\d+\.\d+:(\d+)\)`) + for { + raw, ok = <-rawChan + if !ok { + break + } + rawEvent = raw.(libaudit.RawAuditMessage) + + // type Event struct { + // timestamp time.Time + // pid, ppid int + // syscall int + // argc int + // args []string + // cwd string + // } + switch rawEvent.Type { + case auparse.AUDIT_SYSCALL: + if syscallRegex.Match(rawEvent.Data) { + match := syscallRegex.FindSubmatch(rawEvent.Data) + event.timestamp, err[0] = getTimeFromStr(string(match[1])) + eventId, err[1] = strconv.Atoi(string(match[2])) + event.syscall, err[2] = strconv.Atoi(string(match[3])) + event.ppid, err[3] = strconv.Atoi(string(match[4])) + event.pid, err[4] = strconv.Atoi(string(match[5])) + eventTable[eventId] = &Event{ + timestamp: event.timestamp, + syscall: event.syscall, + ppid: event.ppid, + pid: event.pid, + argc: 0, + args: make([]string, 0), + cwd: "", + } + } + case auparse.AUDIT_EXECVE: + if execveRegex.Match(rawEvent.Data) { + match := execveRegex.FindSubmatch(rawEvent.Data) + eventId, err[0] = strconv.Atoi(string(match[1])) + argc, err[1] = strconv.Atoi(string(match[2])) + if err[0] == nil && err[1] == nil && argsRegex.Match(rawEvent.Data) { + match := argsRegex.FindAllSubmatch(rawEvent.Data, -1) + for i := 0; i < argc; i++ { + if len(match[i][2]) == 0 { + // 代表着匹配到的是十六进制数 + str := hexToAscii(string(match[i][3])) + eventTable[eventId].args = append(eventTable[eventId].args, str) + fmt.Printf("Origin: \"%s\", Res: \"%s\"\n", match[i][3], str) + } else { + eventTable[eventId].args = append(eventTable[eventId].args, string(match[i][2])) + } + } + eventTable[eventId].argc = argc + } + } + // case auparse.AUDIT_PATH: + case auparse.AUDIT_CWD: + if cwdRegex.Match(rawEvent.Data) { + match := cwdRegex.FindSubmatch(rawEvent.Data) + eventId, err[0] = strconv.Atoi(string(match[1])) + eventTable[eventId].cwd = string(match[2]) + } + case auparse.AUDIT_PROCTITLE: + if proctitleRegex.Match(rawEvent.Data) { + var cmdline string + var pEvent *Event + match := proctitleRegex.FindSubmatch(rawEvent.Data) + eventId, err[0] = strconv.Atoi(string(match[1])) + pEvent = eventTable[eventId] + if pEvent.argc == 0 { + // 只有等于0,才证明没经过EXECVE提取参数,才允许使用PROCTITLE提取参数 + if match[3] == nil { + // PROCTITLE写的是十六进制,转换为字符串 + cmdline = hexToAscii(string(match[4])) + } else { + cmdline = string(match[3]) + } + pEvent.args = strings.Split(cmdline, " ") + pEvent.argc = len(eventTable[eventId].args) + } + // 当读到proctitle的时候,而且是个新进程最好检查一下cwd,如果还为空,找proc + if pEvent.cwd == "" && (pEvent.syscall == 57 || pEvent.syscall == 58 || pEvent.syscall == 59) { + cwdFilePath := fmt.Sprintf("/proc/%d/cwd", pEvent.pid) + pEvent.cwd, err[1] = os.Readlink(cwdFilePath) + if err[1] != nil { + pEvent.cwd = "" + break + } + } + } + case auparse.AUDIT_EOE: + if eoeRegex.Match(rawEvent.Data) { + match := eoeRegex.FindSubmatch(rawEvent.Data) + eventId, err[0] = strconv.Atoi(string(match[1])) + // TODO: 事件整理完毕,即刻发出,是否合理呢? + cooked = *eventTable[eventId] // 应当采用深拷贝吗?有待实验 + cookedChan <- cooked + delete(eventTable, eventId) //发出之后就从信息表扔掉,死人别占地 + } + default: + // TODO: 这里也需要做防护 + } + } +} + +func deal() { + defer wg.Done() + var cooked Event + var ok bool + for { + cooked, ok = <-cookedChan + if !ok { + break + } + // type Event struct { + // timestamp time.Time + // pid, ppid int + // syscall int + // argc int + // args []string + // cwd string + // } + fmt.Printf("recv: %v syscall=%d, ppid=%d, pid=%d, cwd=\"%s\", argc=%d, ", cooked.timestamp, cooked.syscall, cooked.ppid, cooked.pid, cooked.cwd, cooked.argc) + if len(cooked.args) != cooked.argc { + fmt.Printf("Fuck!\n") + continue + } + for i := 0; i < cooked.argc; i++ { + fmt.Printf("arg[%d]=\"%s\", ", i, cooked.args[i]) + } + fmt.Printf("\n") } } -- cgit v1.2.3-70-g09d2