For now, fgraph is used for the fprobe, even if we need trace the entry only. However, the performance of ftrace is better than fgraph, and we can use ftrace_ops for this case. Then performance of kprobe-multi increases from 54M to 69M. Before this commit: $ ./benchs/run_bench_trigger.sh kprobe-multi kprobe-multi : 54.663 ± 0.493M/s After this commit: $ ./benchs/run_bench_trigger.sh kprobe-multi kprobe-multi : 69.447 ± 0.143M/s Mitigation is disable during the bench testing above. Signed-off-by: Menglong Dong --- kernel/trace/fprobe.c | 88 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 81 insertions(+), 7 deletions(-) diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 1785fba367c9..de4ae075548d 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -292,7 +292,7 @@ static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops if (node->addr != func) continue; fp = READ_ONCE(node->fp); - if (fp && !fprobe_disabled(fp)) + if (fp && !fprobe_disabled(fp) && fp->exit_handler) fp->nmissed++; } return 0; @@ -312,11 +312,11 @@ static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops if (node->addr != func) continue; fp = READ_ONCE(node->fp); - if (!fp || fprobe_disabled(fp)) + if (unlikely(!fp || fprobe_disabled(fp) || !fp->exit_handler)) continue; data_size = fp->entry_data_size; - if (data_size && fp->exit_handler) + if (data_size) data = fgraph_data + used + FPROBE_HEADER_SIZE_IN_LONG; else data = NULL; @@ -327,7 +327,7 @@ static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops ret = __fprobe_handler(func, ret_ip, fp, fregs, data); /* If entry_handler returns !0, nmissed is not counted but skips exit_handler. */ - if (!ret && fp->exit_handler) { + if (!ret) { int size_words = SIZE_IN_LONG(data_size); if (write_fprobe_header(&fgraph_data[used], fp, size_words)) @@ -384,6 +384,70 @@ static struct fgraph_ops fprobe_graph_ops = { }; static int fprobe_graph_active; +/* ftrace_ops backend (entry-only) */ +static void fprobe_ftrace_entry(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct ftrace_regs *fregs) +{ + struct fprobe_hlist_node *node; + struct rhlist_head *head, *pos; + struct fprobe *fp; + + guard(rcu)(); + head = rhltable_lookup(&fprobe_ip_table, &ip, fprobe_rht_params); + + rhl_for_each_entry_rcu(node, pos, head, hlist) { + if (node->addr != ip) + break; + fp = READ_ONCE(node->fp); + if (unlikely(!fp || fprobe_disabled(fp) || fp->exit_handler)) + continue; + /* entry-only path: no exit_handler nor per-call data */ + if (fprobe_shared_with_kprobes(fp)) + __fprobe_kprobe_handler(ip, parent_ip, fp, fregs, NULL); + else + __fprobe_handler(ip, parent_ip, fp, fregs, NULL); + } +} +NOKPROBE_SYMBOL(fprobe_ftrace_entry); + +static struct ftrace_ops fprobe_ftrace_ops = { + .func = fprobe_ftrace_entry, + .flags = FTRACE_OPS_FL_SAVE_REGS, +}; +static int fprobe_ftrace_active; + +static int fprobe_ftrace_add_ips(unsigned long *addrs, int num) +{ + int ret; + + lockdep_assert_held(&fprobe_mutex); + + ret = ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 0, 0); + if (ret) + return ret; + + if (!fprobe_ftrace_active) { + ret = register_ftrace_function(&fprobe_ftrace_ops); + if (ret) { + ftrace_free_filter(&fprobe_ftrace_ops); + return ret; + } + } + fprobe_ftrace_active++; + return 0; +} + +static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num) +{ + lockdep_assert_held(&fprobe_mutex); + + fprobe_ftrace_active--; + if (!fprobe_ftrace_active) + unregister_ftrace_function(&fprobe_ftrace_ops); + if (num) + ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 1, 0); +} + /* Add @addrs to the ftrace filter and register fgraph if needed. */ static int fprobe_graph_add_ips(unsigned long *addrs, int num) { @@ -500,9 +564,12 @@ static int fprobe_module_callback(struct notifier_block *nb, } while (node == ERR_PTR(-EAGAIN)); rhashtable_walk_exit(&iter); - if (alist.index < alist.size && alist.index > 0) + if (alist.index < alist.size && alist.index > 0) { ftrace_set_filter_ips(&fprobe_graph_ops.ops, alist.addrs, alist.index, 1, 0); + ftrace_set_filter_ips(&fprobe_ftrace_ops, + alist.addrs, alist.index, 1, 0); + } mutex_unlock(&fprobe_mutex); kfree(alist.addrs); @@ -735,7 +802,11 @@ int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num) mutex_lock(&fprobe_mutex); hlist_array = fp->hlist_array; - ret = fprobe_graph_add_ips(addrs, num); + if (fp->exit_handler) + ret = fprobe_graph_add_ips(addrs, num); + else + ret = fprobe_ftrace_add_ips(addrs, num); + if (!ret) { add_fprobe_hash(fp); for (i = 0; i < hlist_array->size; i++) { @@ -831,7 +902,10 @@ int unregister_fprobe(struct fprobe *fp) } del_fprobe_hash(fp); - fprobe_graph_remove_ips(addrs, count); + if (fp->exit_handler) + fprobe_graph_remove_ips(addrs, count); + else + fprobe_ftrace_remove_ips(addrs, count); kfree_rcu(hlist_array, rcu); fp->hlist_array = NULL; -- 2.51.0