The numa_node can be < 0 since NUMA_NO_NODE = -1. However, struct blk_mq_hw_ctx{} defines numa_node as unsigned int. As a result, numa_node is set to UINT_MAX for NUMA_NO_NODE in blk_mq_alloc_hctx(). Later, nvme_setup_descriptor_pools() accesses descriptor_pools[numa_node]. Due to the above, it tries to access descriptor_pools[UINT_MAX]. The address is garbage but accessible because it is canonical and still within the slab memory range. Therefore, no page fault occurs, and KASAN cannot detect this since it is beyond the redzones. Subsequently, normal I/O calls dma_pool_alloc() with the garbage pool address. pool->next_block contains a wild pointer, causing a general protection fault (GPF). To fix this, this patch changes the type of numa_node to int and adds a check for NUMA_NO_NODE. Log: Oops: general protection fault, probably for non-canonical address 0xe9803b040854d02c: 0000 [#1] SMP KASAN PTI KASAN: maybe wild-memory-access in range [0x4c01f82042a68160-0x4c01f82042a68167][FEMU] Err: I/O cmd failed: opcode=0x2 status=0x4002 CPU: 0 UID: 0 PID: 112363 Comm: systemd-udevd Not tainted 6.19.0-dirty #10 PREEMPT(voluntary) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 RIP: 0010:pool_block_pop mm/dmapool.c:187 [inline] RIP: 0010:dma_pool_alloc+0x110/0x990 mm/dmapool.c:417 Code: 00 0f 85 a4 07 00 00 4c 8b 63 58 4d 85 e4 0f 84 12 01 00 00 e8 41 1d 93 ff 4c 89 e2 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 a7 07 00 00 49 8b 04 24 48 8d 7b 68 48 89 fa 48 RSP: 0018:ffffc90002b9efd0 EFLAGS: 00010003 RAX: dffffc0000000000 RBX: ffff888005466800 RCX: ffffffff94faab7f RDX: 09803f040854d02c RSI: 6c9b26c9b26c9b27 RDI: ffff88800c725ea0 RBP: ffffc90002b9f060 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000003 R11: 0000000000000000 R12: 4c01f82042a68164 R13: ffff888005466800 R14: 0000000000000820 R15: ffff888007b29000 FS: 00007f2abc4ff8c0(0000) GS:ffff8880d1ff7000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000056360eb89000 CR3: 000000000a480000 CR4: 00000000000006f0 Call Trace: nvme_pci_setup_data_prp drivers/nvme/host/pci.c:906 [inline] nvme_map_data drivers/nvme/host/pci.c:1114 [inline] nvme_prep_rq.part.0+0x17d3/0x3c90 drivers/nvme/host/pci.c:1243 nvme_prep_rq drivers/nvme/host/pci.c:1239 [inline] nvme_prep_rq_batch drivers/nvme/host/pci.c:1321 [inline] nvme_queue_rqs+0x37b/0x8a0 drivers/nvme/host/pci.c:1336 __blk_mq_flush_list block/blk-mq.c:2848 [inline] __blk_mq_flush_list+0xaa/0xe0 block/blk-mq.c:2844 blk_mq_dispatch_queue_requests+0x4f5/0x990 block/blk-mq.c:2893 blk_mq_flush_plug_list+0x232/0x650 block/blk-mq.c:2981 __blk_flush_plug+0x2c3/0x510 block/blk-core.c:1225 blk_finish_plug block/blk-core.c:1252 [inline] blk_finish_plug+0x64/0xc0 block/blk-core.c:1249 read_pages+0x6bd/0x9d0 mm/readahead.c:176 page_cache_ra_unbounded+0x659/0x950 mm/readahead.c:269 do_page_cache_ra mm/readahead.c:332 [inline] force_page_cache_ra+0x282/0x3a0 mm/readahead.c:361 page_cache_sync_ra+0x201/0xbf0 mm/readahead.c:579 filemap_get_pages+0x3be/0x1990 mm/filemap.c:2690 filemap_read+0x3ea/0xdf0 mm/filemap.c:2800 blkdev_read_iter+0x1b8/0x520 block/fops.c:856 new_sync_read fs/read_write.c:491 [inline] vfs_read+0x90f/0xd80 fs/read_write.c:572 ksys_read+0x14e/0x280 fs/read_write.c:715 __do_sys_read fs/read_write.c:724 [inline] __se_sys_read fs/read_write.c:722 [inline] __x64_sys_read+0x7b/0xc0 fs/read_write.c:722 x64_sys_call+0x17ec/0x21b0 arch/x86/include/generated/asm/syscalls_64.h:1 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0x8b/0x1200 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f2abc7b204e Code: 0f 1f 40 00 48 8b 15 79 af 00 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb ba 0f 1f 00 64 8b 04 25 18 00 00 00 85 c0 75 14 0f 05 <48> 3d 00 f0 ff ff 77 5a c3 66 0f 1f 84 00 00 00 00 00 48 83 ec 28 RSP: 002b:00007fff07113cb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 000056360eb6a528 RCX: 00007f2abc7b204e RDX: 0000000000040000 RSI: 000056360eb6a538 RDI: 000000000000000f RBP: 000056360e8d23d0 R08: 000056360eb6a510 R09: 00007f2abc79abe0 R10: 0000000000040050 R11: 0000000000000246 R12: 000000003ff80000 R13: 0000000000040000 R14: 000056360eb6a510 R15: 000056360e8d2420 Modules linked in: Fixes: 320ae51feed5 ("blk-mq: new multi-queue block IO queueing mechanism") Fixes: d977506f8863 ("nvme-pci: make PRP list DMA pools per-NUMA-node") Acked-by: Chao Shi Acked-by: Weidong Zhu Acked-by: Dave Tian Signed-off-by: Sungwoo Kim --- block/bsg-lib.c | 2 +- drivers/block/mtip32xx/mtip32xx.c | 2 +- drivers/block/nbd.c | 2 +- drivers/md/dm-rq.c | 2 +- drivers/mmc/core/queue.c | 2 +- drivers/mtd/ubi/block.c | 2 +- drivers/nvme/host/apple.c | 2 +- drivers/nvme/host/fc.c | 2 +- drivers/nvme/host/pci.c | 11 ++++++++--- drivers/nvme/host/rdma.c | 2 +- drivers/nvme/host/tcp.c | 2 +- drivers/nvme/target/loop.c | 2 +- drivers/scsi/scsi_lib.c | 2 +- include/linux/blk-mq.h | 4 ++-- 14 files changed, 22 insertions(+), 17 deletions(-) diff --git a/block/bsg-lib.c b/block/bsg-lib.c index 9ceb5d0832f5..e93b1018a346 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -299,7 +299,7 @@ static blk_status_t bsg_queue_rq(struct blk_mq_hw_ctx *hctx, /* called right after the request is allocated for the request_queue */ static int bsg_init_rq(struct blk_mq_tag_set *set, struct request *req, - unsigned int hctx_idx, unsigned int numa_node) + unsigned int hctx_idx, int numa_node) { struct bsg_job *job = blk_mq_rq_to_pdu(req); diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 567192e371a8..8aedba9b5690 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3340,7 +3340,7 @@ static void mtip_free_cmd(struct blk_mq_tag_set *set, struct request *rq, } static int mtip_init_cmd(struct blk_mq_tag_set *set, struct request *rq, - unsigned int hctx_idx, unsigned int numa_node) + unsigned int hctx_idx, int numa_node) { struct driver_data *dd = set->driver_data; struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index f6c33b21f69e..e1fac1c0c4cd 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1888,7 +1888,7 @@ static void nbd_dbg_close(void) #endif static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq, - unsigned int hctx_idx, unsigned int numa_node) + unsigned int hctx_idx, int numa_node) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq); cmd->nbd = set->driver_data; diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index a6ca92049c10..b687a209256b 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -455,7 +455,7 @@ static void dm_start_request(struct mapped_device *md, struct request *orig) } static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq, - unsigned int hctx_idx, unsigned int numa_node) + unsigned int hctx_idx, int numa_node) { struct mapped_device *md = set->driver_data; struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq); diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 284856c8f655..06cb29190a88 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -203,7 +203,7 @@ static unsigned short mmc_get_max_segments(struct mmc_host *host) } static int mmc_mq_init_request(struct blk_mq_tag_set *set, struct request *req, - unsigned int hctx_idx, unsigned int numa_node) + unsigned int hctx_idx, int numa_node) { struct mmc_queue_req *mq_rq = req_to_mmc_queue_req(req); struct mmc_queue *mq = set->driver_data; diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index b53fd147fa65..1c0bd2b36637 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -312,7 +312,7 @@ static blk_status_t ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx, static int ubiblock_init_request(struct blk_mq_tag_set *set, struct request *req, unsigned int hctx_idx, - unsigned int numa_node) + int numa_node) { struct ubiblock_pdu *pdu = blk_mq_rq_to_pdu(req); diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index ed61b97fde59..50ff5e9a168d 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -819,7 +819,7 @@ static int apple_nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, static int apple_nvme_init_request(struct blk_mq_tag_set *set, struct request *req, unsigned int hctx_idx, - unsigned int numa_node) + int numa_node) { struct apple_nvme_queue *q = set->driver_data; struct apple_nvme *anv = queue_to_apple_nvme(q); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 6948de3f438a..64d0c5d7613a 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2109,7 +2109,7 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, static int nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, - unsigned int hctx_idx, unsigned int numa_node) + unsigned int hctx_idx, int numa_node) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(set->driver_data); struct nvme_fcp_op_w_sgl *op = blk_mq_rq_to_pdu(rq); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3c83076a57e5..a5f12fc7655d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -443,11 +443,16 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, __le32 *dbbuf_db, } static struct nvme_descriptor_pools * -nvme_setup_descriptor_pools(struct nvme_dev *dev, unsigned numa_node) +nvme_setup_descriptor_pools(struct nvme_dev *dev, int numa_node) { - struct nvme_descriptor_pools *pools = &dev->descriptor_pools[numa_node]; + struct nvme_descriptor_pools *pools; size_t small_align = NVME_SMALL_POOL_SIZE; + if (numa_node == NUMA_NO_NODE) + pools = &dev->descriptor_pools[numa_node_id()]; + else + pools = &dev->descriptor_pools[numa_node]; + if (pools->small) return pools; /* already initialized */ @@ -516,7 +521,7 @@ static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, static int nvme_pci_init_request(struct blk_mq_tag_set *set, struct request *req, unsigned int hctx_idx, - unsigned int numa_node) + int numa_node) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 35c0822edb2d..c2514ef94028 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -292,7 +292,7 @@ static void nvme_rdma_exit_request(struct blk_mq_tag_set *set, static int nvme_rdma_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, - unsigned int numa_node) + int numa_node) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(set->driver_data); struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 69cb04406b47..385eef98081b 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -547,7 +547,7 @@ static void nvme_tcp_exit_request(struct blk_mq_tag_set *set, static int nvme_tcp_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, - unsigned int numa_node) + int numa_node) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data); struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index fc8e7c9ad858..72a8ea70eae7 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -202,7 +202,7 @@ static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl, static int nvme_loop_init_request(struct blk_mq_tag_set *set, struct request *req, unsigned int hctx_idx, - unsigned int numa_node) + int numa_node) { struct nvme_loop_ctrl *ctrl = to_loop_ctrl(set->driver_data); struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 4a902c9dfd8b..8958ad31ed2a 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1948,7 +1948,7 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, } static int scsi_mq_init_request(struct blk_mq_tag_set *set, struct request *rq, - unsigned int hctx_idx, unsigned int numa_node) + unsigned int hctx_idx, int numa_node) { struct Scsi_Host *shost = set->driver_data; struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index cae9e857aea4..1a5a3786522c 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -426,7 +426,7 @@ struct blk_mq_hw_ctx { struct blk_mq_tags *sched_tags; /** @numa_node: NUMA node the storage adapter has been connected to. */ - unsigned int numa_node; + int numa_node; /** @queue_num: Index of this hardware queue. */ unsigned int queue_num; @@ -651,7 +651,7 @@ struct blk_mq_ops { * flush request. */ int (*init_request)(struct blk_mq_tag_set *set, struct request *, - unsigned int, unsigned int); + unsigned int, int); /** * @exit_request: Ditto for exit/teardown. */ -- 2.47.3