Add a tracepoint for sandbox enforcement, emitted from the landlock_restrict_self() syscall handler after the new domain is created. This logs both the source ruleset ID (with its version at the time of the merge) and the new domain ID, enabling trace consumers to correlate add_rule events (which use the ruleset ID) with check_rule events (which use the domain ID). The TP_PROTO takes only the ruleset and domain pointers. The ruleset version and parent domain ID are computed in TP_fast_assign from these pointers rather than passed as scalar arguments. This lets eBPF programs access the full ruleset and domain state via BTF on just two pointers. TP_fast_assign includes lockdep_assert_held(&ruleset->lock) to enforce that the caller holds the ruleset lock during emission, ensuring eBPF programs see a consistent ruleset->version via BTF. Move the ruleset lock acquisition from landlock_merge_ruleset() to the caller so the lock is held across the merge, TSYNC, and tracepoint emission. The tracepoint fires only after all fallible operations (including TSYNC) have succeeded, so every event corresponds to a domain that is actually installed. The flags-only restrict_self path (ruleset_fd == -1) does not create a domain and does not emit this event. restrict_self flags that affect logging (log_same_exec, log_new_exec) are accessible via BTF on domain->hierarchy. Add a landlock_free_domain tracepoint that fires when a domain's hierarchy node is freed. The hierarchy node is the lifecycle boundary because it represents the domain's identity and outlives the domain's access masks, which may still be active in descendant domains. Domain freeing is asynchronous: it happens in a workqueue because the credential free path runs in RCU callback context where the teardown chain's sleeping operations (iput, audit_log_start, put_pid) are forbidden. Cc: Günther Noack Cc: Justin Suess Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: Tingmao Wang Signed-off-by: Mickaël Salaün --- Changes since v1: - New patch. --- include/trace/events/landlock.h | 69 +++++++++++++++++++++++++++++++++ security/landlock/domain.c | 20 +++++----- security/landlock/log.c | 5 +++ security/landlock/syscalls.c | 23 ++++++++++- 4 files changed, 105 insertions(+), 12 deletions(-) diff --git a/include/trace/events/landlock.h b/include/trace/events/landlock.h index f1e96c447b97..533aea6152e1 100644 --- a/include/trace/events/landlock.h +++ b/include/trace/events/landlock.h @@ -12,6 +12,8 @@ #include +struct landlock_domain; +struct landlock_hierarchy; struct landlock_ruleset; struct path; @@ -165,6 +167,73 @@ TRACE_EVENT(landlock_add_rule_net, TP_printk("ruleset=%llx.%u access_rights=0x%x port=%llu", __entry->ruleset_id, __entry->ruleset_version, __entry->access_rights, __entry->port)); + +/** + * landlock_restrict_self - new domain created from landlock_restrict_self() + * @ruleset: Source ruleset frozen into the domain (never NULL); caller + * holds ruleset->lock for BTF consistency. eBPF programs can + * read the full ruleset state via BTF (rules, version, access + * masks). + * @domain: Newly created domain (never NULL, immutable after creation). + * eBPF programs can navigate domain->hierarchy->parent for the + * parent domain chain. + * + * Emitted after the domain is successfully installed (including TSYNC + * if requested). The flags-only restrict_self path (ruleset_fd == -1) + * does not create a domain and does not emit this event. Restrict_self + * flags that affect logging (log_same_exec, log_new_exec) are accessible + * via BTF on domain->hierarchy. + */ +TRACE_EVENT(landlock_restrict_self, + + TP_PROTO(const struct landlock_ruleset *ruleset, + const struct landlock_domain *domain), + + TP_ARGS(ruleset, domain), + + TP_STRUCT__entry(__field(__u64, ruleset_id) + __field(__u32, ruleset_version) + __field(__u64, domain_id) + __field(__u64, parent_id)), + + TP_fast_assign( + lockdep_assert_held(&ruleset->lock); + __entry->ruleset_id = ruleset->id; + __entry->ruleset_version = ruleset->version; + __entry->domain_id = domain->hierarchy->id; + __entry->parent_id = domain->hierarchy->parent ? + domain->hierarchy->parent->id : + 0;), + + TP_printk("ruleset=%llx.%u domain=%llx parent=%llx", + __entry->ruleset_id, __entry->ruleset_version, + __entry->domain_id, __entry->parent_id)); + +/** + * landlock_free_domain - domain freed + * @hierarchy: Hierarchy node being freed (never NULL); eBPF can read + * hierarchy->details (creator identity), hierarchy->parent + * (domain chain), and hierarchy->log_status via BTF + * + * Emitted when the domain's last reference is dropped, either + * asynchronously from a kworker (via landlock_put_domain_deferred) or + * synchronously from the calling task (via landlock_put_domain). + */ +TRACE_EVENT(landlock_free_domain, + + TP_PROTO(const struct landlock_hierarchy *hierarchy), + + TP_ARGS(hierarchy), + + TP_STRUCT__entry(__field(__u64, domain_id) __field(__u64, denials)), + + TP_fast_assign( + __entry->domain_id = hierarchy->id; + __entry->denials = atomic64_read(&hierarchy->num_denials);), + + TP_printk("domain=%llx denials=%llu", __entry->domain_id, + __entry->denials)); + #endif /* _TRACE_LANDLOCK_H */ /* This part must be outside protection */ diff --git a/security/landlock/domain.c b/security/landlock/domain.c index 0dfd53ae9dd7..45ee7ec87957 100644 --- a/security/landlock/domain.c +++ b/security/landlock/domain.c @@ -294,31 +294,28 @@ static int merge_ruleset(struct landlock_domain *const dst, if (WARN_ON_ONCE(!dst || !dst->hierarchy)) return -EINVAL; - mutex_lock(&src->lock); + lockdep_assert_held(&src->lock); /* Stacks the new layer. */ - if (WARN_ON_ONCE(dst->num_layers < 1)) { - err = -EINVAL; - goto out_unlock; - } + if (WARN_ON_ONCE(dst->num_layers < 1)) + return -EINVAL; + dst->layers[dst->num_layers - 1] = landlock_upgrade_handled_access_masks(src->layer); /* Merges the @src inode tree. */ err = merge_tree(dst, src, LANDLOCK_KEY_INODE); if (err) - goto out_unlock; + return err; #if IS_ENABLED(CONFIG_INET) /* Merges the @src network port tree. */ err = merge_tree(dst, src, LANDLOCK_KEY_NET_PORT); if (err) - goto out_unlock; + return err; #endif /* IS_ENABLED(CONFIG_INET) */ -out_unlock: - mutex_unlock(&src->lock); - return err; + return 0; } static int inherit_tree(struct landlock_domain *const parent, @@ -399,6 +396,8 @@ static int inherit_ruleset(struct landlock_domain *const parent, * The current task is requesting to be restricted. The subjective credentials * must not be in an overridden state. cf. landlock_init_hierarchy_log(). * + * The caller must hold @ruleset->lock. + * * Return: A new domain merging @parent and @ruleset on success, or ERR_PTR() on * failure. If @parent is NULL, the new domain duplicates @ruleset. */ @@ -411,6 +410,7 @@ landlock_merge_ruleset(struct landlock_domain *const parent, int err; might_sleep(); + lockdep_assert_held(&ruleset->lock); if (WARN_ON_ONCE(!ruleset)) return ERR_PTR(-EINVAL); diff --git a/security/landlock/log.c b/security/landlock/log.c index ef79e4ed0037..ab4f982f8184 100644 --- a/security/landlock/log.c +++ b/security/landlock/log.c @@ -174,9 +174,12 @@ static void audit_denial(const struct landlock_cred_security *const subject, #endif /* CONFIG_AUDIT */ +#include + #ifdef CONFIG_TRACEPOINTS #define CREATE_TRACE_POINTS #include +#undef CREATE_TRACE_POINTS #endif /* CONFIG_TRACEPOINTS */ static struct landlock_hierarchy * @@ -473,6 +476,8 @@ void landlock_log_free_domain(const struct landlock_hierarchy *const hierarchy) if (WARN_ON_ONCE(!hierarchy)) return; + trace_landlock_free_domain(hierarchy); + if (!audit_enabled) return; diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c index b18e83e457c2..93999749d80e 100644 --- a/security/landlock/syscalls.c +++ b/security/landlock/syscalls.c @@ -491,6 +491,7 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32, flags) { struct landlock_ruleset *ruleset __free(landlock_put_ruleset) = NULL; + struct landlock_domain *new_dom = NULL; struct cred *new_cred; struct landlock_cred_security *new_llcred; bool __maybe_unused log_same_exec, log_new_exec, log_subdomains, @@ -558,10 +559,15 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32, * There is no possible race condition while copying and * manipulating the current credentials because they are * dedicated per thread. + * + * Holds @ruleset->lock across the merge and tracepoint + * emission so that the tracepoint reads the exact + * ruleset version frozen into the new domain. */ - struct landlock_domain *const new_dom = - landlock_merge_ruleset(new_llcred->domain, ruleset); + mutex_lock(&ruleset->lock); + new_dom = landlock_merge_ruleset(new_llcred->domain, ruleset); if (IS_ERR(new_dom)) { + mutex_unlock(&ruleset->lock); abort_creds(new_cred); return PTR_ERR(new_dom); } @@ -586,10 +592,23 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32, const int err = landlock_restrict_sibling_threads( current_cred(), new_cred); if (err) { + if (ruleset) + mutex_unlock(&ruleset->lock); abort_creds(new_cred); return err; } } + /* + * Emit after all fallible operations (including TSYNC) have + * succeeded, so every event corresponds to an installed domain. + * The ruleset lock is still held for BTF consistency (enforced + * by lockdep_assert_held in TP_fast_assign). + */ + if (new_dom) + trace_landlock_restrict_self(ruleset, new_dom); + + if (ruleset) + mutex_unlock(&ruleset->lock); return commit_creds(new_cred); } -- 2.53.0