aboutsummaryrefslogtreecommitdiffstats
path: root/godo.go
diff options
context:
space:
mode:
authorWe-unite <3205135446@qq.com>2024-07-17 11:47:03 +0800
committerWe-unite <3205135446@qq.com>2024-07-17 14:03:06 +0800
commitf055b3940f999c2e26448812e67b68da363dcbad (patch)
tree145411eb93d96ecd4b5b24783d10da5e345791f7 /godo.go
downloadgodo-f055b3940f999c2e26448812e67b68da363dcbad.tar.gz
godo-f055b3940f999c2e26448812e67b68da363dcbad.zip
Initial commit
This repo is to supervise all processes in containers, in other words inspect behaviors of dockers, and get the pid tree. There are several ways for programs in user space to intereact with kernel space: - system calls, which can be found out in source path arch/x86/syscalls - ioctl - /proc virtual file system, to read kernel realtime info - nerlink socket the pid we should pay attention to is /usr/bin/containerd, which may come from service docker-daemon and ppid is 1. Each time a docker is start or stop, this forks a pid, the pid then forks, that's the main process of the docker. To grub the info of pid create or exit, this program is based on go-libauditd, which uses netlink socket to hear from kernel about audit log. What's worrying is that one event is always devided into several entries, and several events may be received alternately. So, from my point of view, which program has 3 coroutines and 2 channels. the first receives raw event message from audit, then throw it to channel 1; the second listen to channel 1, and organizes each event until there's a EOE, then throw to channel 2; the third discover event from channel 2, deal with th event, such as create or delete pid. Specially, since two relative infomation(pid 1 fork pid2, then pid 1 exits)may comes out of order, deletion mast be delayed for some time(may 1 second), to keep the process tree correct.
Diffstat (limited to '')
-rw-r--r--godo.go213
1 files changed, 213 insertions, 0 deletions
diff --git a/godo.go b/godo.go
new file mode 100644
index 0000000..6b6f48f
--- /dev/null
+++ b/godo.go
@@ -0,0 +1,213 @@
1package main
2
3import (
4 "bufio"
5 "flag"
6 "fmt"
7 "io"
8 "log"
9 "os"
10 "os/exec"
11 "path/filepath"
12 "strconv"
13 "strings"
14 "sync"
15
16 "github.com/elastic/go-libaudit/v2"
17 "github.com/elastic/go-libaudit/v2/auparse"
18)
19
20var (
21 fs = flag.NewFlagSet("audit", flag.ExitOnError)
22 diag = fs.String("diag", "", "dump raw information from kernel to file")
23 rate = fs.Uint("rate", 0, "rate limit in kernel (default 0, no rate limit)")
24 backlog = fs.Uint("backlog", 8192, "backlog limit")
25 immutable = fs.Bool("immutable", false, "make kernel audit settings immutable (requires reboot to undo)")
26 receiveOnly = fs.Bool("ro", false, "receive only using multicast, requires kernel 3.16+")
27)
28
29type process struct {
30 cmdline string
31 rootfs string
32 children []int
33}
34
35var pids map[int]*process //古希腊掌管进程的神
36var containers map[string]int // 古希腊掌管容器的神
37var wg sync.WaitGroup // 掌管协程
38
39func main() {
40 // 检查用户身份,并添加auditd规则,监听所有syscall
41 if os.Geteuid() != 0 {
42 fmt.Printf("Err: Please run me as root, %d!\n", os.Getegid())
43 return
44 }
45 syscall := [5]string{"fork", "vfork", "execve", "exit", "exit_group"}
46 var auditCmd *exec.Cmd
47 auditCmd = exec.Command("auditctl", "-D") // 清空所有规则
48 auditCmd.Run()
49 // 设置监听规则
50 for i := 0; i < 5; i++ {
51 auditCmd = exec.Command("auditctl", "-a", "exit,always", "-F", "arch=b64", "-S", syscall[i])
52 auditCmd.Run()
53 }
54
55 // 查找pid
56 containerdPid, err := getPid()
57 if err != nil {
58 fmt.Printf("Error finding containerd: %v\n", err)
59 return
60 }
61
62 pids = make(map[int]*process)
63 containers = make(map[string]int)
64 pids[containerdPid] = &process{cmdline: "/usr/bin/cmdline", rootfs: "/", children: make([]int, 0)}
65
66 // 开始运行,解析命令行参数后监听
67 if err := fs.Parse(os.Args[1:]); err != nil {
68 log.Fatal(err)
69 }
70
71 if err := read(); err != nil {
72 log.Fatalf("error: %v", err)
73 }
74}
75
76func getPid() (int, error) {
77 // 指定要搜索的关键词
78 keyword := "/usr/bin/containerd"
79
80 // 获取/proc目录下的所有子目录
81 procDir, err := filepath.Glob("/proc/*")
82 if err != nil {
83 return 0, err
84 }
85
86 // 遍历子目录,查找包含关键词的进程
87 for _, dir := range procDir {
88 pid, err := strconv.Atoi(filepath.Base(dir))
89 if err != nil {
90 continue // 跳过非PID的目录
91 }
92
93 // 检查进程是否包含关键词
94 if containsKeyword(pid, keyword) {
95 return pid, nil
96 }
97 }
98 err = fmt.Errorf("Error: no containerd process found.\n")
99 return 0, err
100}
101
102func containsKeyword(pid int, keyword string) bool {
103 // 构造完整的进程命令路径
104 cmdPath := fmt.Sprintf("/proc/%d/cmdline", pid)
105
106 // 打开文件
107 file, err := os.Open(cmdPath)
108 if err != nil {
109 return false
110 }
111 defer file.Close()
112
113 // 读取文件内容
114 scanner := bufio.NewScanner(file)
115 scanner.Split(bufio.ScanLines)
116 for scanner.Scan() {
117 line := scanner.Text()
118 if strings.Contains(line, keyword) {
119 return true
120 }
121 }
122 return false
123}
124
125func read() error {
126 // Write netlink response to a file for further analysis or for writing
127 // tests cases.
128 var diagWriter io.Writer
129 if *diag != "" {
130 f, err := os.OpenFile(*diag, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0o600)
131 if err != nil {
132 return err
133 }
134 defer f.Close()
135 diagWriter = f
136 }
137
138 log.Println("starting netlink client")
139
140 var err error
141 var client *libaudit.AuditClient
142 if *receiveOnly {
143 client, err = libaudit.NewMulticastAuditClient(diagWriter)
144 if err != nil {
145 return fmt.Errorf("failed to create receive-only audit client: %w", err)
146 }
147 defer client.Close()
148 } else {
149 client, err = libaudit.NewAuditClient(diagWriter)
150 if err != nil {
151 return fmt.Errorf("failed to create audit client: %w", err)
152 }
153 defer client.Close()
154
155 status, err := client.GetStatus()
156 if err != nil {
157 return fmt.Errorf("failed to get audit status: %w", err)
158 }
159 log.Printf("received audit status=%+v", status)
160
161 if status.Enabled == 0 {
162 log.Println("enabling auditing in the kernel")
163 if err = client.SetEnabled(true, libaudit.WaitForReply); err != nil {
164 return fmt.Errorf("failed to set enabled=true: %w", err)
165 }
166 }
167
168 if status.RateLimit != uint32(*rate) {
169 log.Printf("setting rate limit in kernel to %v", *rate)
170 if err = client.SetRateLimit(uint32(*rate), libaudit.NoWait); err != nil {
171 return fmt.Errorf("failed to set rate limit to unlimited: %w", err)
172 }
173 }
174
175 if status.BacklogLimit != uint32(*backlog) {
176 log.Printf("setting backlog limit in kernel to %v", *backlog)
177 if err = client.SetBacklogLimit(uint32(*backlog), libaudit.NoWait); err != nil {
178 return fmt.Errorf("failed to set backlog limit: %w", err)
179 }
180 }
181
182 if status.Enabled != 2 && *immutable {
183 log.Printf("setting kernel settings as immutable")
184 if err = client.SetImmutable(libaudit.NoWait); err != nil {
185 return fmt.Errorf("failed to set kernel as immutable: %w", err)
186 }
187 }
188
189 log.Printf("sending message to kernel registering our PID (%v) as the audit daemon", os.Getpid())
190 if err = client.SetPID(libaudit.NoWait); err != nil {
191 return fmt.Errorf("failed to set audit PID: %w", err)
192 }
193 }
194
195 return receive(client)
196}
197
198func receive(r *libaudit.AuditClient) error {
199 for {
200 rawEvent, err := r.Receive(false)
201 if err != nil {
202 return fmt.Errorf("receive failed: %w", err)
203 }
204
205 // Messages from 1300-2999 are valid audit messages.
206 if rawEvent.Type < auparse.AUDIT_USER_AUTH ||
207 rawEvent.Type > auparse.AUDIT_LAST_USER_MSG2 {
208 continue
209 }
210
211 fmt.Printf("type=%v msg=%s\n", rawEvent.Type, rawEvent.Data)
212 }
213}