The numa_node can be < 0 since NUMA_NO_NODE = -1. However,
struct blk_mq_hw_ctx{} defines numa_node as unsigned int. As a result,
numa_node is set to UINT_MAX for NUMA_NO_NODE in blk_mq_alloc_hctx().
Later, nvme_setup_descriptor_pools() accesses
descriptor_pools[numa_node]. Due to the above, it tries to access
descriptor_pools[UINT_MAX]. The address is garbage but accessible
because it is canonical and still within the slab memory range.
Therefore, no page fault occurs, and KASAN cannot detect this since it
is beyond the redzones.
Subsequently, normal I/O calls dma_pool_alloc() with the garbage pool
address. pool->next_block contains a wild pointer, causing a general
protection fault (GPF).
To fix this, this patch changes the type of numa_node to int and adds
a check for NUMA_NO_NODE.
Log:
Oops: general protection fault, probably for non-canonical address 0xe9803b040854d02c: 0000 [#1] SMP KASAN PTI
KASAN: maybe wild-memory-access in range [0x4c01f82042a68160-0x4c01f82042a68167][FEMU] Err: I/O cmd failed: opcode=0x2 status=0x4002
CPU: 0 UID: 0 PID: 112363 Comm: systemd-udevd Not tainted 6.19.0-dirty #10 PREEMPT(voluntary)
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
RIP: 0010:pool_block_pop mm/dmapool.c:187 [inline]
RIP: 0010:dma_pool_alloc+0x110/0x990 mm/dmapool.c:417
Code: 00 0f 85 a4 07 00 00 4c 8b 63 58 4d 85 e4 0f 84 12 01 00 00 e8 41 1d 93 ff 4c 89 e2 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 a7 07 00 00 49 8b 04 24 48 8d 7b 68 48 89 fa 48
RSP: 0018:ffffc90002b9efd0 EFLAGS: 00010003
RAX: dffffc0000000000 RBX: ffff888005466800 RCX: ffffffff94faab7f
RDX: 09803f040854d02c RSI: 6c9b26c9b26c9b27 RDI: ffff88800c725ea0
RBP: ffffc90002b9f060 R08: 0000000000000001 R09: 0000000000000001
R10: 0000000000000003 R11: 0000000000000000 R12: 4c01f82042a68164
R13: ffff888005466800 R14: 0000000000000820 R15: ffff888007b29000
FS: 00007f2abc4ff8c0(0000) GS:ffff8880d1ff7000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000056360eb89000 CR3: 000000000a480000 CR4: 00000000000006f0
Call Trace:
nvme_pci_setup_data_prp drivers/nvme/host/pci.c:906 [inline]
nvme_map_data drivers/nvme/host/pci.c:1114 [inline]
nvme_prep_rq.part.0+0x17d3/0x3c90 drivers/nvme/host/pci.c:1243
nvme_prep_rq drivers/nvme/host/pci.c:1239 [inline]
nvme_prep_rq_batch drivers/nvme/host/pci.c:1321 [inline]
nvme_queue_rqs+0x37b/0x8a0 drivers/nvme/host/pci.c:1336
__blk_mq_flush_list block/blk-mq.c:2848 [inline]
__blk_mq_flush_list+0xaa/0xe0 block/blk-mq.c:2844
blk_mq_dispatch_queue_requests+0x4f5/0x990 block/blk-mq.c:2893
blk_mq_flush_plug_list+0x232/0x650 block/blk-mq.c:2981
__blk_flush_plug+0x2c3/0x510 block/blk-core.c:1225
blk_finish_plug block/blk-core.c:1252 [inline]
blk_finish_plug+0x64/0xc0 block/blk-core.c:1249
read_pages+0x6bd/0x9d0 mm/readahead.c:176
page_cache_ra_unbounded+0x659/0x950 mm/readahead.c:269
do_page_cache_ra mm/readahead.c:332 [inline]
force_page_cache_ra+0x282/0x3a0 mm/readahead.c:361
page_cache_sync_ra+0x201/0xbf0 mm/readahead.c:579
filemap_get_pages+0x3be/0x1990 mm/filemap.c:2690
filemap_read+0x3ea/0xdf0 mm/filemap.c:2800
blkdev_read_iter+0x1b8/0x520 block/fops.c:856
new_sync_read fs/read_write.c:491 [inline]
vfs_read+0x90f/0xd80 fs/read_write.c:572
ksys_read+0x14e/0x280 fs/read_write.c:715
__do_sys_read fs/read_write.c:724 [inline]
__se_sys_read fs/read_write.c:722 [inline]
__x64_sys_read+0x7b/0xc0 fs/read_write.c:722
x64_sys_call+0x17ec/0x21b0 arch/x86/include/generated/asm/syscalls_64.h:1
do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
do_syscall_64+0x8b/0x1200 arch/x86/entry/syscall_64.c:94
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x7f2abc7b204e
Code: 0f 1f 40 00 48 8b 15 79 af 00 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb ba 0f 1f 00 64 8b 04 25 18 00 00 00 85 c0 75 14 0f 05 <48> 3d 00 f0 ff ff 77 5a c3 66 0f 1f 84 00 00 00 00 00 48 83 ec 28
RSP: 002b:00007fff07113cb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
RAX: ffffffffffffffda RBX: 000056360eb6a528 RCX: 00007f2abc7b204e
RDX: 0000000000040000 RSI: 000056360eb6a538 RDI: 000000000000000f
RBP: 000056360e8d23d0 R08: 000056360eb6a510 R09: 00007f2abc79abe0
R10: 0000000000040050 R11: 0000000000000246 R12: 000000003ff80000
R13: 0000000000040000 R14: 000056360eb6a510 R15: 000056360e8d2420
Modules linked in:
Fixes: 320ae51feed5 ("blk-mq: new multi-queue block IO queueing mechanism")
Fixes: d977506f8863 ("nvme-pci: make PRP list DMA pools per-NUMA-node")
Acked-by: Chao Shi
Acked-by: Weidong Zhu
Acked-by: Dave Tian
Signed-off-by: Sungwoo Kim
---
block/bsg-lib.c | 2 +-
drivers/block/mtip32xx/mtip32xx.c | 2 +-
drivers/block/nbd.c | 2 +-
drivers/md/dm-rq.c | 2 +-
drivers/mmc/core/queue.c | 2 +-
drivers/mtd/ubi/block.c | 2 +-
drivers/nvme/host/apple.c | 2 +-
drivers/nvme/host/fc.c | 2 +-
drivers/nvme/host/pci.c | 11 ++++++++---
drivers/nvme/host/rdma.c | 2 +-
drivers/nvme/host/tcp.c | 2 +-
drivers/nvme/target/loop.c | 2 +-
drivers/scsi/scsi_lib.c | 2 +-
include/linux/blk-mq.h | 4 ++--
14 files changed, 22 insertions(+), 17 deletions(-)
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 9ceb5d0832f5..e93b1018a346 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -299,7 +299,7 @@ static blk_status_t bsg_queue_rq(struct blk_mq_hw_ctx *hctx,
/* called right after the request is allocated for the request_queue */
static int bsg_init_rq(struct blk_mq_tag_set *set, struct request *req,
- unsigned int hctx_idx, unsigned int numa_node)
+ unsigned int hctx_idx, int numa_node)
{
struct bsg_job *job = blk_mq_rq_to_pdu(req);
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 567192e371a8..8aedba9b5690 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3340,7 +3340,7 @@ static void mtip_free_cmd(struct blk_mq_tag_set *set, struct request *rq,
}
static int mtip_init_cmd(struct blk_mq_tag_set *set, struct request *rq,
- unsigned int hctx_idx, unsigned int numa_node)
+ unsigned int hctx_idx, int numa_node)
{
struct driver_data *dd = set->driver_data;
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index f6c33b21f69e..e1fac1c0c4cd 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1888,7 +1888,7 @@ static void nbd_dbg_close(void)
#endif
static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq,
- unsigned int hctx_idx, unsigned int numa_node)
+ unsigned int hctx_idx, int numa_node)
{
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq);
cmd->nbd = set->driver_data;
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index a6ca92049c10..b687a209256b 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -455,7 +455,7 @@ static void dm_start_request(struct mapped_device *md, struct request *orig)
}
static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
- unsigned int hctx_idx, unsigned int numa_node)
+ unsigned int hctx_idx, int numa_node)
{
struct mapped_device *md = set->driver_data;
struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 284856c8f655..06cb29190a88 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -203,7 +203,7 @@ static unsigned short mmc_get_max_segments(struct mmc_host *host)
}
static int mmc_mq_init_request(struct blk_mq_tag_set *set, struct request *req,
- unsigned int hctx_idx, unsigned int numa_node)
+ unsigned int hctx_idx, int numa_node)
{
struct mmc_queue_req *mq_rq = req_to_mmc_queue_req(req);
struct mmc_queue *mq = set->driver_data;
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index b53fd147fa65..1c0bd2b36637 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -312,7 +312,7 @@ static blk_status_t ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
static int ubiblock_init_request(struct blk_mq_tag_set *set,
struct request *req, unsigned int hctx_idx,
- unsigned int numa_node)
+ int numa_node)
{
struct ubiblock_pdu *pdu = blk_mq_rq_to_pdu(req);
diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index ed61b97fde59..50ff5e9a168d 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -819,7 +819,7 @@ static int apple_nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
static int apple_nvme_init_request(struct blk_mq_tag_set *set,
struct request *req, unsigned int hctx_idx,
- unsigned int numa_node)
+ int numa_node)
{
struct apple_nvme_queue *q = set->driver_data;
struct apple_nvme *anv = queue_to_apple_nvme(q);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 6948de3f438a..64d0c5d7613a 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2109,7 +2109,7 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl,
static int
nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
- unsigned int hctx_idx, unsigned int numa_node)
+ unsigned int hctx_idx, int numa_node)
{
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(set->driver_data);
struct nvme_fcp_op_w_sgl *op = blk_mq_rq_to_pdu(rq);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 3c83076a57e5..a5f12fc7655d 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -443,11 +443,16 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, __le32 *dbbuf_db,
}
static struct nvme_descriptor_pools *
-nvme_setup_descriptor_pools(struct nvme_dev *dev, unsigned numa_node)
+nvme_setup_descriptor_pools(struct nvme_dev *dev, int numa_node)
{
- struct nvme_descriptor_pools *pools = &dev->descriptor_pools[numa_node];
+ struct nvme_descriptor_pools *pools;
size_t small_align = NVME_SMALL_POOL_SIZE;
+ if (numa_node == NUMA_NO_NODE)
+ pools = &dev->descriptor_pools[numa_node_id()];
+ else
+ pools = &dev->descriptor_pools[numa_node];
+
if (pools->small)
return pools; /* already initialized */
@@ -516,7 +521,7 @@ static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
static int nvme_pci_init_request(struct blk_mq_tag_set *set,
struct request *req, unsigned int hctx_idx,
- unsigned int numa_node)
+ int numa_node)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 35c0822edb2d..c2514ef94028 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -292,7 +292,7 @@ static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
struct request *rq, unsigned int hctx_idx,
- unsigned int numa_node)
+ int numa_node)
{
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(set->driver_data);
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 69cb04406b47..385eef98081b 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -547,7 +547,7 @@ static void nvme_tcp_exit_request(struct blk_mq_tag_set *set,
static int nvme_tcp_init_request(struct blk_mq_tag_set *set,
struct request *rq, unsigned int hctx_idx,
- unsigned int numa_node)
+ int numa_node)
{
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data);
struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index fc8e7c9ad858..72a8ea70eae7 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -202,7 +202,7 @@ static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl,
static int nvme_loop_init_request(struct blk_mq_tag_set *set,
struct request *req, unsigned int hctx_idx,
- unsigned int numa_node)
+ int numa_node)
{
struct nvme_loop_ctrl *ctrl = to_loop_ctrl(set->driver_data);
struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 4a902c9dfd8b..8958ad31ed2a 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1948,7 +1948,7 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
}
static int scsi_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
- unsigned int hctx_idx, unsigned int numa_node)
+ unsigned int hctx_idx, int numa_node)
{
struct Scsi_Host *shost = set->driver_data;
struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index cae9e857aea4..1a5a3786522c 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -426,7 +426,7 @@ struct blk_mq_hw_ctx {
struct blk_mq_tags *sched_tags;
/** @numa_node: NUMA node the storage adapter has been connected to. */
- unsigned int numa_node;
+ int numa_node;
/** @queue_num: Index of this hardware queue. */
unsigned int queue_num;
@@ -651,7 +651,7 @@ struct blk_mq_ops {
* flush request.
*/
int (*init_request)(struct blk_mq_tag_set *set, struct request *,
- unsigned int, unsigned int);
+ unsigned int, int);
/**
* @exit_request: Ditto for exit/teardown.
*/
--
2.47.3