From fc61a4a525846fa31ee2288df4e82f745bb39c95 Mon Sep 17 00:00:00 2001 From: We-unite <3205135446@qq.com> Date: Tue, 23 Jul 2024 19:32:09 +0800 Subject: Try ot fix the out-of-order bug, add EXECVE to it The Most important work during this time is to find out solution to the out-of-order bug. Discribe it here in detail: info from audit may be out of order, which means fork may comes after execve, even after exit. What an absurd penomenon to see a process not yet created to work or exit! To deal with this problem, I've tried several ways: - in the 2nd coroutine, when EOE msg comes, if it's a fork/clone event, send it immediately, otherwise wait for some time(such as 100 ms). But after all it delays longer, and has other problems. - the 2nd coroutine doesn't send directly, but record all the finished event id in a slice, and another thread checks once every one second, if there are sth in slice, send corresponding events in the order of event id. But: event that happens first doesn't always has lower id or time, for example, 1 forks 2, then 2 execve, the audit in kernel it self may gets execve before fork(maybe fork makes other settings), which means execve has earlier timestamp and lower event id. The out- of-order problem is not completely resolved. If we then add delays to non-clone event, a more serious problem happens: we must use mutex to lock the slice recording finished event id to prevent crush between send thread and wait thread, but the wait thread can't get the mutex again, because there are to much clone event and frequent send! - So I use no delay but mongodb, when an execve comes, if pid is not recorded, just insert it and wait for the fork. It does works, but some other works is still left to do: - what should i do if 2 forks 3 comes before 1 forks 2? Now I suggest it doesn't happen, but what if? - when execve comes before fork, i recorded it, but if this process has a parent i don't care, delete, or stays there? Also, as mentioned above, I've add EXECVE field in process into db, records all the execve(time, and args) from the same process. Besides, exit_timestamp and exit_code can be caught now, but too many process has no exit info. This is also to be fixed. Now, let's listen to the file changed by process. Don't forget the to-do works listed above! --- src/deal.go | 173 +++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 102 insertions(+), 71 deletions(-) (limited to 'src/deal.go') diff --git a/src/deal.go b/src/deal.go index 118d914..783dab8 100644 --- a/src/deal.go +++ b/src/deal.go @@ -1,51 +1,48 @@ package main import ( - "context" "fmt" + "sync" "time" "go.mongodb.org/mongo-driver/bson" - "go.mongodb.org/mongo-driver/mongo" - "go.mongodb.org/mongo-driver/mongo/options" ) const ( - dbName string = "test" - colName string = "pids" + dbName string = "test" + pidColName string = "pids" ) +var mongoMutex sync.Mutex +var pidCol mongoClient + func deal() { defer wg.Done() var cooked Event var ok bool var err error - var mongo *mongo.Client var res []bson.M - mongo, err = connect() - if err != nil { - fmt.Printf("Err connecting the mongodb: %v\n", err) + if err = pidCol.Connect(dbName, pidColName); err != nil { + fmt.Printf("Error connecting the mongodb: %v\n", err) } - pidCol := mongo.Database(dbName).Collection(colName) - - err = pidCol.Drop(context.Background()) - if err != nil { - fmt.Printf("Err drop: %v\n", err) + if err = pidCol.Drop(); err != nil { + fmt.Printf("Error drop the mongodb: %v\n", err) } - _, err = pidCol.InsertOne(context.Background(), bson.M{ - "ppid": 1, - "pid": containerdPid, - "cwd": "/", + err = pidCol.InsertOne(bson.M{ + "ppid": 1, + "pid": containerdPid, + "cwd": "/", + "children": bson.M{}, }) if err != nil { fmt.Printf("Err containerd: %v", err) return } - fmt.Printf("Containerd: %d\n", containerdPid) + defer pidCol.Disconnect() for { cooked, ok = <-cookedChan @@ -54,81 +51,115 @@ func deal() { } switch syscallTable[cooked.syscall] { - case "fork", "vfork", "clone": + case "clone": // 有无父进程在观察中 - res, err = findDocuments(mongo, "test", "pids", bson.M{"pid": cooked.ppid}) + res, err = pidCol.Finddoc(bson.M{"pid": cooked.ppid}) if err != nil || len(res) != 1 { break } // 自身是否已经记录 - res, err = findDocuments(mongo, "test", "pids", bson.M{"pid": cooked.pid}) + res, err = pidCol.Finddoc(bson.M{"pid": cooked.pid}) if err != nil { fmt.Printf("Err finding: %v\n", err) break - } else if len(res) != 0 { - fmt.Printf("Err inserting pid %v: already in db: %v\n", cooked.pid, res) - break - } - - doc := []bson.A{} - for _, str := range cooked.argv { - doc = append(doc, bson.A{str}) } - _, err := pidCol.InsertOne(context.Background(), bson.M{ - "timestamp": cooked.timestamp, - "ppid": cooked.ppid, - "pid": cooked.pid, - "cwd": cooked.cwd, - "args": doc, - "children": []bson.M{}, - }) - if err != nil { - fmt.Printf("Err insert: %v\n", err) + mongoMutex.Lock() + if len(res) != 0 { + // 进程原本就存在,换言之别的消息先到了 + // 所有先行抵达的消息必须保留execve/children字段 + // 此处不再更新 + // 以防把原有信息更没了 + pidCol.UpdateOne(bson.M{"pid": cooked.pid}, bson.M{ + "start_timestamp": cooked.timestamp, + "ppid": cooked.ppid, + "pid": cooked.pid, + "cwd": cooked.cwd, + // "execve": []bson.M{}, + "args": cooked.argv, + // "children": []bson.M{}, + }) + } else { + // 这进程本是新修的 + pidCol.InsertOne(bson.M{ + "start_timestamp": cooked.timestamp, + "ppid": cooked.ppid, + "pid": cooked.pid, + "cwd": cooked.cwd, + "execve": []bson.M{}, + "args": cooked.argv, + "children": []bson.M{}, + }) } - _, err = pidCol.UpdateOne(context.Background(), bson.M{"pid": cooked.pid}, bson.M{ + pidCol.UpdateOne(bson.M{"pid": cooked.ppid}, bson.M{ "$push": bson.M{ "children": cooked.pid, }, }) + mongoMutex.Unlock() + case "execve": + // 父进程在不在?不在扔 + res, err = pidCol.Finddoc(bson.M{"pid": cooked.ppid}) + if err != nil || len(res) != 1 { + break + } + + // 首先检查进程是否存在,如不存在则为之创建 + res, err = pidCol.Finddoc(bson.M{"pid": cooked.pid}) if err != nil { - fmt.Printf("Err insert: %v\n", err) + break + } + mongoMutex.Lock() + if len(res) == 1 { + // 自身已在,直接记录 + pidCol.UpdateOne(bson.M{"pid": cooked.pid}, bson.M{ + "$push": bson.M{ + "execve": bson.M{ + "timestamp": cooked.timestamp, + "args": cooked.argv, + }, + }, + }) + } else { + // 先fork抵达,插入 + pidCol.InsertOne(bson.M{ + "children": []bson.M{}, + "execve": []bson.M{ + { + "timestamp": cooked.timestamp, + "execve": cooked.argv, + }, + }, + }) } + mongoMutex.Unlock() case "exit", "exit_group": - // TODO: 记得补全退出逻辑 - // 上哪找exit code呢? + go deletePid(cooked) } } } -func connect() (*mongo.Client, error) { - client, err := mongo.NewClient(options.Client().ApplyURI("mongodb://localhost:27017")) - - if err != nil { - return nil, err - } - - ctx, _ := context.WithTimeout(context.Background(), 10*time.Second) - err = client.Connect(ctx) - - if err != nil { - return nil, err - } - - return client, nil -} - -func findDocuments(client *mongo.Client, dbName, colName string, filter bson.M) ([]bson.M, error) { - collection := client.Database(dbName).Collection(colName) - - cur, err := collection.Find(context.Background(), filter) - if err != nil { - return nil, err - } +func deletePid(cooked Event) { + time.Sleep(1 * time.Second) + mongoMutex.Lock() + // 先从老爹那里销户 + pidCol.UpdateOne(bson.M{"pid": cooked.ppid}, bson.M{ + "$pull": bson.M{ + "children": cooked.pid, + }, + }) - var results []bson.M - err = cur.All(context.Background(), &results) + // 孩子们需要收容 + // 不必到children里一个个找,直接看ppid即可 + pidCol.UpdateMany(bson.M{"ppid": cooked.pid}, bson.M{"ppid": 1}) - return results, err + // 可以去死了 + pidCol.UpdateOne(bson.M{"pid": cooked.pid}, bson.M{ + "$set": bson.M{ + "exit_timestamp": cooked.timestamp, + "exit_code": cooked.exit_code, + }, + }) + mongoMutex.Unlock() } -- cgit v1.2.3-70-g09d2