深入探究Linux Kprobe机制
概述
kprobe机制用于在内核中动态添加一些探测点,可以满足一些调试需求。本文主要探寻kprobe的执行路径,也就是说如何trap到kprobe,以及如何回到原路径继续执行。
实例
先通过一个实例来感受下kprobe,linux中有一个现成的实例:samples/kprobes/kprobe_example.c 由于当前验证环境是基于qemu+arm64,我删除了其他架构的代码,并稍稍做了一下改动:
#include
#include
#include
#define MAX_SYMBOL_LEN 64
static char symbol[MAX_SYMBOL_LEN] = "_do_fork";
module_param_string(symbol, symbol, sizeof(symbol), 0644);
/* For each probe you need to allocate a kprobe structure */
static struct kprobe kp = {
.symbol_name = symbol,
};
/* kprobe pre_handler: called just before the probed instruction is executed */
static int handler_pre(struct kprobe *p, struct pt_regs *regs)
{
pr_info("<%s> pre_handler: p->addr = 0x%p, pc = 0x%lx,"
" pstate = 0x%lx\n",
p->symbol_name, p->addr, (long)regs->pc, (long)regs->pstate);
dump_stack();
/* A dump_stack() here will give a stack backtrace */
return 0;
}
/* kprobe post_handler: called after the probed instruction is executed */
static void handler_post(struct kprobe *p, struct pt_regs *regs,
unsigned long flags)
{
pr_info("<%s> post_handler: p->addr = 0x%p, pstate = 0x%lx\n",
p->symbol_name, p->addr, (long)regs->pstate);
dump_stack();
}
/*
* fault_handler: this is called if an exception is generated for any
* instruction within the pre- or post-handler, or when Kprobes
* single-steps the probed instruction.
*/
static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
{
pr_info("fault_handler: p->addr = 0x%p, trap #%dn", p->addr, trapnr);
/* Return 0 because we don't handle the fault. */
return 0;
}
static int __init kprobe_init(void)
{
int ret;
kp.pre_handler = handler_pre;
kp.post_handler = handler_post;
kp.fault_handler = handler_fault;
ret = register_kprobe(&kp);
if (ret < 0) {
pr_err("register_kprobe failed, returned %d\n", ret);
return ret;
}
pr_info("Planted kprobe at %p\n", kp.addr);
return 0;
}
static void __exit kprobe_exit(void)
{
unregister_kprobe(&kp);
pr_info("kprobe at %p unregistered\n", kp.addr);
}
module_init(kprobe_init)
module_exit(kprobe_exit)
MODULE_LICENSE("GPL");
这段代码很简单,默认情况下,kprobe做了3个钩子,分别在_do_fork对应位置的指令执行之前,执行之后,以及出异常的时候。插入该内核模块之后,随便输入一条命令,可看到下面的打印:
[ 19.882832] kprobe_example: loading out-of-tree module taints kernel.
[ 19.900442] Planted kprobe at (____ptrval____)
[ 19.908571] <_do_fork> pre_handler: p->addr = 0x(____ptrval____), pc = 0xffff0000080d2c98, pstate = 0x80000005
[ 19.913657] CPU: 0 PID: 1358 Comm: udevd Tainted: G O 4.18.0 #7
[ 19.916239] Hardware name: linux,dummy-virt (DT)
[ 19.918400] Call trace:
[ 19.919373] dump_backtrace+0x0/0x180
[ 19.920681] show_stack+0x14/0x20
[ 19.921817] dump_stack+0x90/0xb4
[ 19.923678] handler_pre+0x24/0x68 [kprobe_example]
[ 19.926357] kprobe_breakpoint_handler+0xbc/0x160
[ 19.926627] brk_handler+0x70/0x88
[ 19.926802] do_debug_exception+0x94/0x160
[ 19.927102] el1_dbg+0x18/0x78
[ 19.927299] _do_fork+0x0/0x358
[ 19.927465] el0_svc_naked+0x30/0x34
[ 19.928973] <_do_fork> post_handler: p->addr = 0x(____ptrval____), pstate = 0x80000005
[ 19.929361] CPU: 0 PID: 1358 Comm: udevd Tainted: G O 4.18.0 #7
[ 19.929693] Hardware name: linux,dummy-virt (DT)
[ 19.929962] Call trace:
[ 19.930102] dump_backtrace+0x0/0x180
[ 19.930289] show_stack+0x14/0x20
[ 19.930461] dump_stack+0x90/0xb4
[ 19.934684] handler_post+0x24/0x30 [kprobe_example]
[ 19.934968] post_kprobe_handler+0x54/0x98
[ 19.935234] kprobe_single_step_handler+0x74/0xa8
[ 19.935389] single_step_handler+0x3c/0xb0
[ 19.935516] do_debug_exception+0x94/0x160
[ 19.935642] el1_dbg+0x18/0x78
[ 19.935965] 0xffff000000ac8004
[ 19.936067] el0_svc_naked+0x30/0x34
probe和post钩子得到执行,这对查看内核的调用栈非常有帮助。
深入探究
是否只能基于symbol_name做kprobe?
显然不太可能,struct kprobe中有一个addr成员,很明显是可以直接基于地址做kprobe的。把这段代码:
#define MAX_SYMBOL_LEN 64
static char symbol[MAX_SYMBOL_LEN] = "_do_fork";
module_param_string(symbol, symbol, sizeof(symbol), 0644);
/* For each probe you need to allocate a kprobe structure */
static struct kprobe kp = {
.symbol_name = symbol,
};
修改为:
/* For each probe you need to allocate a kprobe structure */
static struct kprobe kp = {
.addr= (kprobe_opcode_t *)0xffff0000080d2c98,
};
效果是一样的。
kprobe是如何动态添加探针的?
这个肯定要分析代码了,好在代码相当简单:
register_kprobe
|------arm_kprobe
| |------__arm_kprobe
| | |------arch_arm_kprobe
/* arm kprobe: install breakpoint in text */
void __kprobes arch_arm_kprobe(struct kprobe *p)
{
patch_text(p->addr, BRK64_OPCODE_KPROBES);
}
从注释就可以很明显看出来,是把addr对应位置的指令修改为brk指令,当然这里说的是ARM64架构。那么一旦CPU执行到addr,就会触发异常,trap到kprobe注册的钩子上。
post钩子为什么会用到single step?
从上面的调用栈可以看到,post钩子实际上是通过单步断点trap过来的?为什么需要用到单步断点呢?这个其实很好解释。我们先来理一下kprobe的过程:
把addr位置的指令修改为brk指令
CPU执行到addr处trap到pre执行
pre执行完毕后需要把addr处的指令恢复
CPU继续执行addr处的指令
CPU执行post
那么CPU如何才能执行到post,很简单,使能单步执行就可以了。肯定有人会说,可以把addr+4的指令也替换成brk,这个肯定是不行的,因为ARM64可能是32位/16位指令混编的,即便是固定32位指令,CPU下一条要执行的指令也不一定是addr+4,比如当前addr是一条跳转指令。
fault_handler 钩子什么时候会用到?
通过分析代码可知,当发生page fault的时候,会调用当前正在running的kprobe的fault_handler钩子,所以这里发生page fault的代码并不一定是addr处的指令,也可能是pre或者post中的指令。我在pre中注入一段访问0地址的逻辑:
static void * g_addr=0;
static int handler_pre(struct kprobe *p, struct pt_regs *regs) __attribute__((optimize("O0")));
static int handler_pre(struct kprobe *p, struct pt_regs *regs)
{
pr_info("<%s> pre_handler: p->addr = 0x%p, pc = 0x%lx,"
" pstate = 0x%lx\n",
p->symbol_name, p->addr, (long)regs->pc, (long)regs->pstate);
printk("%d\n", *(char *)g_addr);
/* A dump_stack() here will give a stack backtrace */
return 0;
}
经验证确实调用到了fault_handler钩子:
[ 17.272594] kprobe_example: loading out-of-tree module taints kernel.
[ 17.294266] Planted kprobe at (____ptrval____)
#
# ls
[ 19.072586] <(null)> pre_handler: p->addr = 0x(____ptrval____), pc = 0xffff0000080d2c98, pstate = 0x80000005
[ 19.073189] fault_handler: p->addr = 0x(____ptrval____), trap #-1778384890n
[ 19.073568] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
[ 19.074271] Mem abort info:
[ 19.074393] ESR = 0x96000006
[ 19.074641] Exception class = DABT (current EL), IL = 32 bits
[ 19.074887] SET = 0, FnV = 0
[ 19.075014] EA = 0, S1PTW = 0
[ 19.075174] Data abort info:
[ 19.075324] ISV = 0, ISS = 0x00000006
[ 19.075455] CM = 0, WnR = 0
[ 19.075774] user pgtable: 4k pages, 48-bit VAs, pgdp = (____ptrval____)
[ 19.076005] [0000000000000000] pgd=00000000485c6003, pud=00000000bb2f4003, pmd=0000000000000000
[ 19.076596] Internal error: Oops: 96000006 [#1] PREEMPT SMP
[ 19.076924] Modules linked in: kprobe_example(O)
[ 19.077693] CPU: 0 PID: 1387 Comm: sh Tainted: G O 4.18.0 #7
[ 19.077927] Hardware name: linux,dummy-virt (DT)
[ 19.078298] pstate: 400003c5 (nZcv DAIF -PAN -UAO)
[ 19.078962] pc : handler_pre+0x50/0x70 [kprobe_example]
[ 19.079149] lr : handler_pre+0x44/0x70 [kprobe_example]
[ 19.079359] sp : ffff00000ac63c00
[ 19.079565] x29: ffff00000ac63c00 x28: ffff80007a3c9a80
[ 19.079821] x27: ffff000008ac1000 x26: 00000000000000dc
[ 19.080047] x25: ffff80007dfb7788 x24: 0000000000000000
[ 19.080363] x23: ffff0000080d2c98 x22: ffff00000ac63d70
[ 19.080621] x21: ffff000000ac2000 x20: 0000800074f02000
[ 19.080863] x19: ffff0000090b5788 x18: ffffffffffffffff
[ 19.081197] x17: 0000000000000000 x16: 0000000000000000
[ 19.081501] x15: ffff0000090d96c8 x14: 3030303030666666
[ 19.081720] x13: 667830203d206370 x12: ffff0000090d9940
[ 19.081933] x11: ffff0000085dd8d8 x10: 5f287830203d2072
[ 19.082189] x9 : 0000000000000017 x8 : 2065746174737020
[ 19.082455] x7 : 2c38396332643038 x6 : ffff80007dfb8240
[ 19.082660] x5 : ffff80007dfb8240 x4 : 0000000000000000
[ 19.082871] x3 : ffff80007dfbf030 x2 : 793b575e486def00
[ 19.083068] x1 : 0000000000000000 x0 : 0000000000000000
[ 19.083390] Process sh (pid: 1387, stack limit = 0x(____ptrval____))
[ 19.083783] Call trace:
[ 19.084020] handler_pre+0x50/0x70 [kprobe_example]
[ 19.084470] kprobe_breakpoint_handler+0xbc/0x160
[ 19.084693] brk_handler+0x70/0x88
[ 19.084839] do_debug_exception+0x94/0x160
[ 19.085132] el1_dbg+0x18/0x78
[ 19.085259] _do_fork+0x0/0x358
[ 19.085443] el0_svc_naked+0x30/0x34
[ 19.085939] Code: 95d9a53f b0000000 9101c000 f9400000 (39400000)
[ 19.086713] ---[ end trace 3bb11c402bc37363 ]---
但由于fault_handler中没有对该异常做处理,所以依然挂死了。fault_handler可以用于报错或者纠错,报错可以自定义一些错误信息给用户,以便分析错误;纠错用于修改错误,那么针对当前这个错误应该怎么做纠错呢?在fault_handler中为g_addr分配空间?,这显然不行,g_addr肯定已经被载入寄存器了,此时修改已经太迟。唯一的方法就是修改寄存器的值,而寄存器此时肯定已经入栈了,所以必须修改寄存器在栈里面的内容。下面我们来fixup这个挂死问题:
- 根据挂死信息
[ 19.084020] handler_pre+0x50/0x70 [kprobe_example]
是在handler_pre+0x50这个位置出异常的,通过反汇编得知这个位置对应的指令是:
50: 39400000 ldrb w0, [x0]
x0的内容是0,所以这里是读0地址,很明显,g_addr被载入到了x0中,所以只要修改x0就可以了。
- fixup实现
修改fault_handler函数:
static int g_addr1=0x5a;
static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
{
pr_info("fault_handler: p->addr = 0x%p, trap #%dn", p->addr, trapnr);
regs->regs[0] = (unsigned long)&g_addr1;
/* Return 0 because we don't handle the fault. */
return 1;
}
- 验证
[ 58.882059] <(null)> pre_handler: p->addr = 0x(____ptrval____), pc = 0xffff0000080d2c98, pstate = 0x80000005
[ 58.882393] fault_handler: p->addr = 0x(____ptrval____), trap #-1778384890n
[ 58.882411] 90
[ 58.882658] <(null)> post_handler: p->addr = 0x(____ptrval____), pstate = 0x80000005
[ 58.882960] CPU: 1 PID: 1388 Comm: sh Tainted: G O 4.18.0 #7
fault_handler之后,pre_handler打印了g_addr对应地址的内容是90,也就是0x5a。大功告成,我们成功的让内核访问了0地址,并且返回了0x5a。
来自:liuhangtiant
链接:https://blog.csdn.net/liuhangtiant/article/details/109555795
5T技术资源大放送!包括但不限于:C/C++,Arm, Linux,Android,人工智能,单片机,树莓派,等等。在公众号内回复「peter」,即可免费获取!!
记得点击分享、赞和在看,给我充点儿电吧