RDS zerocopy sends pin user pages into the message data SG list and uses a zerocopy notifier to carry pin accounting and completion state. If iov_iter_get_pages2() fails after some SG entries have already been populated, the copy helper currently tears down the notifier and returns an error while leaving the partial SG count live. The common message purge path no longer has a notifier to distinguish zerocopy user pages from copied-send pages in that state, so it can release a still-mapped user page with __free_page(). Make data page ownership explicit with an op_zcopy bit. Once zerocopy pin accounting succeeds, leave the notifier, ownership bit, and populated SG count attached to the message and let rds_message_purge() perform the single cleanup path. The notifier still controls zerocopy completion and pin accounting. The new op_zcopy bit controls whether counted data SG entries are released with put_page() or __free_page(). This preserves normal copied-send cleanup and queued zerocopy completion behavior. It fixes the partial-build failure path without adding a second manual unwind path. Tested with an AF_RDS/RDS_TCP MSG_ZEROCOPY partial-fault reproducer on a KASAN kernel. Before the fix the run triggered bad page/accounting reports; after the fix sendmsg returns -EFAULT and no bad page or KASAN report occurs. Fixes: 0cebaccef3ac ("rds: zerocopy Tx support.") Cc: stable@vger.kernel.org Assisted-By: Codex:GPT-5.5-xhigh Signed-off-by: Spencer Phillips --- net/rds/message.c | 33 ++++++++++----------------------- net/rds/rds.h | 1 + 2 files changed, 11 insertions(+), 23 deletions(-) diff --git a/net/rds/message.c b/net/rds/message.c index 25fedcb3cd00..a381a895339c 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -141,7 +141,7 @@ static void rds_message_purge(struct rds_message *rm) spin_lock_irqsave(&rm->m_rs_lock, flags); znotifier = rm->data.op_mmp_znotifier; rm->data.op_mmp_znotifier = NULL; - zcopy = !!znotifier; + zcopy = rm->data.op_zcopy || !!znotifier; if (rm->m_rs) { struct rds_sock *rs = rm->m_rs; @@ -170,6 +170,7 @@ static void rds_message_purge(struct rds_message *rm) put_page(sg_page(&rm->data.op_sg[i])); } rm->data.op_nents = 0; + rm->data.op_zcopy = 0; if (rm->rdma.op_active) rds_rdma_free_op(&rm->rdma); @@ -414,7 +415,6 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from) { struct scatterlist *sg; - int ret = 0; int length = iov_iter_count(from); struct rds_msg_zcopy_info *info; @@ -429,12 +429,12 @@ static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter * if (!info) return -ENOMEM; INIT_LIST_HEAD(&info->rs_zcookie_next); - rm->data.op_mmp_znotifier = &info->znotif; - if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp, - length)) { - ret = -ENOMEM; - goto err; + if (mm_account_pinned_pages(&info->znotif.z_mmp, length)) { + kfree(info); + return -ENOMEM; } + rm->data.op_mmp_znotifier = &info->znotif; + rm->data.op_zcopy = 1; while (iov_iter_count(from)) { struct page *pages; size_t start; @@ -442,28 +442,15 @@ static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter * copied = iov_iter_get_pages2(from, &pages, PAGE_SIZE, 1, &start); - if (copied < 0) { - struct mmpin *mmp; - int i; - - for (i = 0; i < rm->data.op_nents; i++) - put_page(sg_page(&rm->data.op_sg[i])); - mmp = &rm->data.op_mmp_znotifier->z_mmp; - mm_unaccount_pinned_pages(mmp); - ret = -EFAULT; - goto err; - } + if (copied < 0) + return -EFAULT; length -= copied; sg_set_page(sg, pages, copied, start); rm->data.op_nents++; sg++; } WARN_ON_ONCE(length != 0); - return ret; -err: - kfree(info); - rm->data.op_mmp_znotifier = NULL; - return ret; + return 0; } int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from, diff --git a/net/rds/rds.h b/net/rds/rds.h index 6e0790e4b570..b27848ec5c5a 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -496,6 +496,7 @@ struct rds_message { } rdma; struct rm_data_op { unsigned int op_active:1; + unsigned int op_zcopy:1; unsigned int op_nents; unsigned int op_count; unsigned int op_dmasg; -- 2.54.0