diff options
author | We-unite <3205135446@qq.com> | 2024-07-17 11:47:03 +0800 |
---|---|---|
committer | We-unite <3205135446@qq.com> | 2024-07-17 14:03:06 +0800 |
commit | f055b3940f999c2e26448812e67b68da363dcbad (patch) | |
tree | 145411eb93d96ecd4b5b24783d10da5e345791f7 /godo.go | |
download | godo-f055b3940f999c2e26448812e67b68da363dcbad.tar.gz godo-f055b3940f999c2e26448812e67b68da363dcbad.zip |
Initial commit
This repo is to supervise all processes in containers, in other
words inspect behaviors of dockers, and get the pid tree.
There are several ways for programs in user space to intereact with
kernel space:
- system calls, which can be found out in source path arch/x86/syscalls
- ioctl
- /proc virtual file system, to read kernel realtime info
- nerlink socket
the pid we should pay attention to is /usr/bin/containerd, which may
come from service docker-daemon and ppid is 1. Each time a docker is
start or stop, this forks a pid, the pid then forks, that's the main
process of the docker.
To grub the info of pid create or exit, this program is based on
go-libauditd, which uses netlink socket to hear from kernel about
audit log. What's worrying is that one event is always devided into
several entries, and several events may be received alternately.
So, from my point of view, which program has 3 coroutines and 2
channels. the first receives raw event message from audit, then
throw it to channel 1; the second listen to channel 1, and organizes
each event until there's a EOE, then throw to channel 2; the third
discover event from channel 2, deal with th event, such as create or
delete pid. Specially, since two relative infomation(pid 1 fork pid2,
then pid 1 exits)may comes out of order, deletion mast be delayed for
some time(may 1 second), to keep the process tree correct.
Diffstat (limited to 'godo.go')
-rw-r--r-- | godo.go | 213 |
1 files changed, 213 insertions, 0 deletions
@@ -0,0 +1,213 @@ | |||
1 | package main | ||
2 | |||
3 | import ( | ||
4 | "bufio" | ||
5 | "flag" | ||
6 | "fmt" | ||
7 | "io" | ||
8 | "log" | ||
9 | "os" | ||
10 | "os/exec" | ||
11 | "path/filepath" | ||
12 | "strconv" | ||
13 | "strings" | ||
14 | "sync" | ||
15 | |||
16 | "github.com/elastic/go-libaudit/v2" | ||
17 | "github.com/elastic/go-libaudit/v2/auparse" | ||
18 | ) | ||
19 | |||
20 | var ( | ||
21 | fs = flag.NewFlagSet("audit", flag.ExitOnError) | ||
22 | diag = fs.String("diag", "", "dump raw information from kernel to file") | ||
23 | rate = fs.Uint("rate", 0, "rate limit in kernel (default 0, no rate limit)") | ||
24 | backlog = fs.Uint("backlog", 8192, "backlog limit") | ||
25 | immutable = fs.Bool("immutable", false, "make kernel audit settings immutable (requires reboot to undo)") | ||
26 | receiveOnly = fs.Bool("ro", false, "receive only using multicast, requires kernel 3.16+") | ||
27 | ) | ||
28 | |||
29 | type process struct { | ||
30 | cmdline string | ||
31 | rootfs string | ||
32 | children []int | ||
33 | } | ||
34 | |||
35 | var pids map[int]*process //古希腊掌管进程的神 | ||
36 | var containers map[string]int // 古希腊掌管容器的神 | ||
37 | var wg sync.WaitGroup // 掌管协程 | ||
38 | |||
39 | func main() { | ||
40 | // 检查用户身份,并添加auditd规则,监听所有syscall | ||
41 | if os.Geteuid() != 0 { | ||
42 | fmt.Printf("Err: Please run me as root, %d!\n", os.Getegid()) | ||
43 | return | ||
44 | } | ||
45 | syscall := [5]string{"fork", "vfork", "execve", "exit", "exit_group"} | ||
46 | var auditCmd *exec.Cmd | ||
47 | auditCmd = exec.Command("auditctl", "-D") // 清空所有规则 | ||
48 | auditCmd.Run() | ||
49 | // 设置监听规则 | ||
50 | for i := 0; i < 5; i++ { | ||
51 | auditCmd = exec.Command("auditctl", "-a", "exit,always", "-F", "arch=b64", "-S", syscall[i]) | ||
52 | auditCmd.Run() | ||
53 | } | ||
54 | |||
55 | // 查找pid | ||
56 | containerdPid, err := getPid() | ||
57 | if err != nil { | ||
58 | fmt.Printf("Error finding containerd: %v\n", err) | ||
59 | return | ||
60 | } | ||
61 | |||
62 | pids = make(map[int]*process) | ||
63 | containers = make(map[string]int) | ||
64 | pids[containerdPid] = &process{cmdline: "/usr/bin/cmdline", rootfs: "/", children: make([]int, 0)} | ||
65 | |||
66 | // 开始运行,解析命令行参数后监听 | ||
67 | if err := fs.Parse(os.Args[1:]); err != nil { | ||
68 | log.Fatal(err) | ||
69 | } | ||
70 | |||
71 | if err := read(); err != nil { | ||
72 | log.Fatalf("error: %v", err) | ||
73 | } | ||
74 | } | ||
75 | |||
76 | func getPid() (int, error) { | ||
77 | // 指定要搜索的关键词 | ||
78 | keyword := "/usr/bin/containerd" | ||
79 | |||
80 | // 获取/proc目录下的所有子目录 | ||
81 | procDir, err := filepath.Glob("/proc/*") | ||
82 | if err != nil { | ||
83 | return 0, err | ||
84 | } | ||
85 | |||
86 | // 遍历子目录,查找包含关键词的进程 | ||
87 | for _, dir := range procDir { | ||
88 | pid, err := strconv.Atoi(filepath.Base(dir)) | ||
89 | if err != nil { | ||
90 | continue // 跳过非PID的目录 | ||
91 | } | ||
92 | |||
93 | // 检查进程是否包含关键词 | ||
94 | if containsKeyword(pid, keyword) { | ||
95 | return pid, nil | ||
96 | } | ||
97 | } | ||
98 | err = fmt.Errorf("Error: no containerd process found.\n") | ||
99 | return 0, err | ||
100 | } | ||
101 | |||
102 | func containsKeyword(pid int, keyword string) bool { | ||
103 | // 构造完整的进程命令路径 | ||
104 | cmdPath := fmt.Sprintf("/proc/%d/cmdline", pid) | ||
105 | |||
106 | // 打开文件 | ||
107 | file, err := os.Open(cmdPath) | ||
108 | if err != nil { | ||
109 | return false | ||
110 | } | ||
111 | defer file.Close() | ||
112 | |||
113 | // 读取文件内容 | ||
114 | scanner := bufio.NewScanner(file) | ||
115 | scanner.Split(bufio.ScanLines) | ||
116 | for scanner.Scan() { | ||
117 | line := scanner.Text() | ||
118 | if strings.Contains(line, keyword) { | ||
119 | return true | ||
120 | } | ||
121 | } | ||
122 | return false | ||
123 | } | ||
124 | |||
125 | func read() error { | ||
126 | // Write netlink response to a file for further analysis or for writing | ||
127 | // tests cases. | ||
128 | var diagWriter io.Writer | ||
129 | if *diag != "" { | ||
130 | f, err := os.OpenFile(*diag, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0o600) | ||
131 | if err != nil { | ||
132 | return err | ||
133 | } | ||
134 | defer f.Close() | ||
135 | diagWriter = f | ||
136 | } | ||
137 | |||
138 | log.Println("starting netlink client") | ||
139 | |||
140 | var err error | ||
141 | var client *libaudit.AuditClient | ||
142 | if *receiveOnly { | ||
143 | client, err = libaudit.NewMulticastAuditClient(diagWriter) | ||
144 | if err != nil { | ||
145 | return fmt.Errorf("failed to create receive-only audit client: %w", err) | ||
146 | } | ||
147 | defer client.Close() | ||
148 | } else { | ||
149 | client, err = libaudit.NewAuditClient(diagWriter) | ||
150 | if err != nil { | ||
151 | return fmt.Errorf("failed to create audit client: %w", err) | ||
152 | } | ||
153 | defer client.Close() | ||
154 | |||
155 | status, err := client.GetStatus() | ||
156 | if err != nil { | ||
157 | return fmt.Errorf("failed to get audit status: %w", err) | ||
158 | } | ||
159 | log.Printf("received audit status=%+v", status) | ||
160 | |||
161 | if status.Enabled == 0 { | ||
162 | log.Println("enabling auditing in the kernel") | ||
163 | if err = client.SetEnabled(true, libaudit.WaitForReply); err != nil { | ||
164 | return fmt.Errorf("failed to set enabled=true: %w", err) | ||
165 | } | ||
166 | } | ||
167 | |||
168 | if status.RateLimit != uint32(*rate) { | ||
169 | log.Printf("setting rate limit in kernel to %v", *rate) | ||
170 | if err = client.SetRateLimit(uint32(*rate), libaudit.NoWait); err != nil { | ||
171 | return fmt.Errorf("failed to set rate limit to unlimited: %w", err) | ||
172 | } | ||
173 | } | ||
174 | |||
175 | if status.BacklogLimit != uint32(*backlog) { | ||
176 | log.Printf("setting backlog limit in kernel to %v", *backlog) | ||
177 | if err = client.SetBacklogLimit(uint32(*backlog), libaudit.NoWait); err != nil { | ||
178 | return fmt.Errorf("failed to set backlog limit: %w", err) | ||
179 | } | ||
180 | } | ||
181 | |||
182 | if status.Enabled != 2 && *immutable { | ||
183 | log.Printf("setting kernel settings as immutable") | ||
184 | if err = client.SetImmutable(libaudit.NoWait); err != nil { | ||
185 | return fmt.Errorf("failed to set kernel as immutable: %w", err) | ||
186 | } | ||
187 | } | ||
188 | |||
189 | log.Printf("sending message to kernel registering our PID (%v) as the audit daemon", os.Getpid()) | ||
190 | if err = client.SetPID(libaudit.NoWait); err != nil { | ||
191 | return fmt.Errorf("failed to set audit PID: %w", err) | ||
192 | } | ||
193 | } | ||
194 | |||
195 | return receive(client) | ||
196 | } | ||
197 | |||
198 | func receive(r *libaudit.AuditClient) error { | ||
199 | for { | ||
200 | rawEvent, err := r.Receive(false) | ||
201 | if err != nil { | ||
202 | return fmt.Errorf("receive failed: %w", err) | ||
203 | } | ||
204 | |||
205 | // Messages from 1300-2999 are valid audit messages. | ||
206 | if rawEvent.Type < auparse.AUDIT_USER_AUTH || | ||
207 | rawEvent.Type > auparse.AUDIT_LAST_USER_MSG2 { | ||
208 | continue | ||
209 | } | ||
210 | |||
211 | fmt.Printf("type=%v msg=%s\n", rawEvent.Type, rawEvent.Data) | ||
212 | } | ||
213 | } | ||