In RNBD client, for a WRITE request of size 0, with only the REQ_PREFLUSH bit set, while converting from bio_opf to rnbd_opf, we do REQ_OP_WRITE to RNBD_OP_WRITE, and then check if the rq is flush through function op_is_flush. That function checks both REQ_PREFLUSH and REQ_FUA flag, and if any of them is set, the RNBD_F_FUA is set. On the RNBD server side, while converting the RNBD flags to req flags, if the RNBD_F_FUA flag is set, we just set the REQ_FUA flag. This means we have lost the PREFLUSH flag, and added the REQ_FUA flag in its place. This commits adds a new RNBD_F_PREFLUSH flag, and also adds separate handling for REQ_PREFLUSH flag. On the server side, if the RNBD_F_PREFLUSH is present, the REQ_PREFLUSH is added to the bio. Since it is a change in the wire protocol, bump the minor version of protocol. The change is backwards compatible, and does not change the functionality if either the client or the server is running older/newer versions. If the client side is running the older version, both REQ_PREFLUSH and REQ_FUA is converted to RNBD_F_FUA. The server running newer one would still add only the REQ_FUA flag which is what happens when both client and server is running the older version. If the client side is running the newer version, just like before a RNBD_F_FUA is added, but now a RNBD_F_PREFLUSH is also added to the rnbd_opf. In case the server is running the older version the RNBD_F_PREFLUSH is ignored, and only the RNBD_F_FUA is processed. Signed-off-by: Md Haris Iqbal Reviewed-by: Jack Wang Reviewed-by: Florian-Ewald Mueller Signed-off-by: Grzegorz Prajsner --- drivers/block/rnbd/rnbd-proto.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/block/rnbd/rnbd-proto.h b/drivers/block/rnbd/rnbd-proto.h index 77360c2a6069..5e74ae86169b 100644 --- a/drivers/block/rnbd/rnbd-proto.h +++ b/drivers/block/rnbd/rnbd-proto.h @@ -18,7 +18,7 @@ #include #define RNBD_PROTO_VER_MAJOR 2 -#define RNBD_PROTO_VER_MINOR 0 +#define RNBD_PROTO_VER_MINOR 1 /* The default port number the RTRS server is listening on. */ #define RTRS_PORT 1234 @@ -197,6 +197,7 @@ struct rnbd_msg_io { * * @RNBD_F_SYNC: request is sync (sync write or read) * @RNBD_F_FUA: forced unit access + * @RNBD_F_PREFLUSH: request for cache flush */ enum rnbd_io_flags { @@ -211,6 +212,7 @@ enum rnbd_io_flags { /* Flags */ RNBD_F_SYNC = 1<<(RNBD_OP_BITS + 0), RNBD_F_FUA = 1<<(RNBD_OP_BITS + 1), + RNBD_F_PREFLUSH = 1<<(RNBD_OP_BITS + 2) }; static inline u32 rnbd_op(u32 flags) @@ -258,6 +260,9 @@ static inline blk_opf_t rnbd_to_bio_flags(u32 rnbd_opf) if (rnbd_opf & RNBD_F_FUA) bio_opf |= REQ_FUA; + if (rnbd_opf & RNBD_F_PREFLUSH) + bio_opf |= REQ_PREFLUSH; + return bio_opf; } @@ -297,6 +302,9 @@ static inline u32 rq_to_rnbd_flags(struct request *rq) if (op_is_flush(rq->cmd_flags)) rnbd_opf |= RNBD_F_FUA; + if (rq->cmd_flags & REQ_PREFLUSH) + rnbd_opf |= RNBD_F_PREFLUSH; + return rnbd_opf; } -- 2.43.0 From: Zhu Yanjun Every ktype must provides a .release function that will be called after the last kobject_put. Signed-off-by: Zhu Yanjun Reviewed-by: Md Haris Iqbal Signed-off-by: Grzegorz Prajsner --- drivers/block/rnbd/rnbd-clt-sysfs.c | 8 ++++++++ drivers/block/rnbd/rnbd-clt.c | 18 ++++++++++-------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c index 6ea7c12e3a87..144aea1466a4 100644 --- a/drivers/block/rnbd/rnbd-clt-sysfs.c +++ b/drivers/block/rnbd/rnbd-clt-sysfs.c @@ -475,9 +475,17 @@ void rnbd_clt_remove_dev_symlink(struct rnbd_clt_dev *dev) } } +static void rnbd_dev_release(struct kobject *kobj) +{ + struct rnbd_clt_dev *dev = container_of(kobj, struct rnbd_clt_dev, kobj); + + kfree(dev); +} + static const struct kobj_type rnbd_dev_ktype = { .sysfs_ops = &kobj_sysfs_ops, .default_groups = rnbd_dev_groups, + .release = rnbd_dev_release, }; static int rnbd_clt_add_dev_kobj(struct rnbd_clt_dev *dev) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index f1409e54010a..085fe8dd1179 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -60,7 +60,9 @@ static void rnbd_clt_put_dev(struct rnbd_clt_dev *dev) kfree(dev->pathname); rnbd_clt_put_sess(dev->sess); mutex_destroy(&dev->lock); - kfree(dev); + + if (dev->kobj.state_initialized) + kobject_put(&dev->kobj); } static inline bool rnbd_clt_get_dev(struct rnbd_clt_dev *dev) @@ -1514,7 +1516,7 @@ static bool insert_dev_if_not_exists_devpath(struct rnbd_clt_dev *dev) return found; } -static void delete_dev(struct rnbd_clt_dev *dev) +static void rnbd_delete_dev(struct rnbd_clt_dev *dev) { struct rnbd_clt_session *sess = dev->sess; @@ -1635,7 +1637,7 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname, kfree(rsp); rnbd_put_iu(sess, iu); del_dev: - delete_dev(dev); + rnbd_delete_dev(dev); put_dev: rnbd_clt_put_dev(dev); put_sess: @@ -1644,13 +1646,13 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname, return ERR_PTR(ret); } -static void destroy_gen_disk(struct rnbd_clt_dev *dev) +static void rnbd_destroy_gen_disk(struct rnbd_clt_dev *dev) { del_gendisk(dev->gd); put_disk(dev->gd); } -static void destroy_sysfs(struct rnbd_clt_dev *dev, +static void rnbd_destroy_sysfs(struct rnbd_clt_dev *dev, const struct attribute *sysfs_self) { rnbd_clt_remove_dev_symlink(dev); @@ -1688,9 +1690,9 @@ int rnbd_clt_unmap_device(struct rnbd_clt_dev *dev, bool force, dev->dev_state = DEV_STATE_UNMAPPED; mutex_unlock(&dev->lock); - delete_dev(dev); - destroy_sysfs(dev, sysfs_self); - destroy_gen_disk(dev); + rnbd_delete_dev(dev); + rnbd_destroy_sysfs(dev, sysfs_self); + rnbd_destroy_gen_disk(dev); if (was_mapped && sess->rtrs) send_msg_close(dev, dev->device_id, RTRS_PERMIT_WAIT); -- 2.43.0 The NOUNMAP flag is in combination with WRITE_ZEROES flag to indicate that the upper layers wants the sectors zeroed, but does not want it to get freed. This instruction is especially important for storage stacks which involves a layer capable of thin provisioning. This commit makes RNBD block device transfer and retain this NOUNMAP flag for requests, so it can be passed onto the backend device on the server side. Since it is a change in the wire protocol, bump the minor version of protocol. Signed-off-by: Md Haris Iqbal Signed-off-by: Jack Wang Signed-off-by: Grzegorz Prajsner --- drivers/block/rnbd/rnbd-proto.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/block/rnbd/rnbd-proto.h b/drivers/block/rnbd/rnbd-proto.h index 5e74ae86169b..64f1cfe9f8ef 100644 --- a/drivers/block/rnbd/rnbd-proto.h +++ b/drivers/block/rnbd/rnbd-proto.h @@ -18,7 +18,7 @@ #include #define RNBD_PROTO_VER_MAJOR 2 -#define RNBD_PROTO_VER_MINOR 1 +#define RNBD_PROTO_VER_MINOR 2 /* The default port number the RTRS server is listening on. */ #define RTRS_PORT 1234 @@ -198,6 +198,7 @@ struct rnbd_msg_io { * @RNBD_F_SYNC: request is sync (sync write or read) * @RNBD_F_FUA: forced unit access * @RNBD_F_PREFLUSH: request for cache flush + * @RNBD_F_NOUNMAP: do not free blocks when zeroing */ enum rnbd_io_flags { @@ -212,7 +213,8 @@ enum rnbd_io_flags { /* Flags */ RNBD_F_SYNC = 1<<(RNBD_OP_BITS + 0), RNBD_F_FUA = 1<<(RNBD_OP_BITS + 1), - RNBD_F_PREFLUSH = 1<<(RNBD_OP_BITS + 2) + RNBD_F_PREFLUSH = 1<<(RNBD_OP_BITS + 2), + RNBD_F_NOUNMAP = 1<<(RNBD_OP_BITS + 3) }; static inline u32 rnbd_op(u32 flags) @@ -247,6 +249,9 @@ static inline blk_opf_t rnbd_to_bio_flags(u32 rnbd_opf) break; case RNBD_OP_WRITE_ZEROES: bio_opf = REQ_OP_WRITE_ZEROES; + + if (rnbd_opf & RNBD_F_NOUNMAP) + bio_opf |= REQ_NOUNMAP; break; default: WARN(1, "Unknown RNBD type: %d (flags %d)\n", @@ -285,6 +290,9 @@ static inline u32 rq_to_rnbd_flags(struct request *rq) break; case REQ_OP_WRITE_ZEROES: rnbd_opf = RNBD_OP_WRITE_ZEROES; + + if (rq->cmd_flags & REQ_NOUNMAP) + rnbd_opf |= RNBD_F_NOUNMAP; break; case REQ_OP_FLUSH: rnbd_opf = RNBD_OP_FLUSH; -- 2.43.0 From: Jack Wang The __print_flags helper meant for bitmask, while the rnbd_rw_flags is mixed with bitmask and enum, to avoid confusion, just print the data as it is. Signed-off-by: Jack Wang Reviewed-by: Md Haris Iqbal Signed-off-by: Grzegorz Prajsner --- drivers/block/rnbd/rnbd-srv-trace.h | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/drivers/block/rnbd/rnbd-srv-trace.h b/drivers/block/rnbd/rnbd-srv-trace.h index 89d0bcb17195..18ae2ed5537a 100644 --- a/drivers/block/rnbd/rnbd-srv-trace.h +++ b/drivers/block/rnbd/rnbd-srv-trace.h @@ -44,24 +44,6 @@ DEFINE_EVENT(rnbd_srv_link_class, name, \ DEFINE_LINK_EVENT(create_sess); DEFINE_LINK_EVENT(destroy_sess); -TRACE_DEFINE_ENUM(RNBD_OP_READ); -TRACE_DEFINE_ENUM(RNBD_OP_WRITE); -TRACE_DEFINE_ENUM(RNBD_OP_FLUSH); -TRACE_DEFINE_ENUM(RNBD_OP_DISCARD); -TRACE_DEFINE_ENUM(RNBD_OP_SECURE_ERASE); -TRACE_DEFINE_ENUM(RNBD_F_SYNC); -TRACE_DEFINE_ENUM(RNBD_F_FUA); - -#define show_rnbd_rw_flags(x) \ - __print_flags(x, "|", \ - { RNBD_OP_READ, "READ" }, \ - { RNBD_OP_WRITE, "WRITE" }, \ - { RNBD_OP_FLUSH, "FLUSH" }, \ - { RNBD_OP_DISCARD, "DISCARD" }, \ - { RNBD_OP_SECURE_ERASE, "SECURE_ERASE" }, \ - { RNBD_F_SYNC, "SYNC" }, \ - { RNBD_F_FUA, "FUA" }) - TRACE_EVENT(process_rdma, TP_PROTO(struct rnbd_srv_session *srv, const struct rnbd_msg_io *msg, @@ -97,7 +79,7 @@ TRACE_EVENT(process_rdma, __entry->usrlen = usrlen; ), - TP_printk("I/O req: sess: %s, type: %s, ver: %d, devid: %u, sector: %llu, bsize: %u, flags: %s, ioprio: %d, datalen: %u, usrlen: %zu", + TP_printk("I/O req: sess: %s, type: %s, ver: %d, devid: %u, sector: %llu, bsize: %u, flags: %u, ioprio: %d, datalen: %u, usrlen: %zu", __get_str(sessname), __print_symbolic(__entry->dir, { READ, "READ" }, @@ -106,7 +88,7 @@ TRACE_EVENT(process_rdma, __entry->device_id, __entry->sector, __entry->bi_size, - show_rnbd_rw_flags(__entry->flags), + __entry->flags, __entry->ioprio, __entry->datalen, __entry->usrlen -- 2.43.0 From: Florian-Ewald Mueller On rnbd-srv, the bi_size of the bio is set during the bio_add_page function, to which datalen is passed. But for special IOs like DISCARD and WRITE_ZEROES, datalen is 0, since there is no data to write. For these special IOs, use the bi_size of the rnbd_msg_io. Fixes: f6f84be089c9 ("block/rnbd-srv: Add sanity check and remove redundant assignment") Signed-off-by: Florian-Ewald Mueller Signed-off-by: Md Haris Iqbal Signed-off-by: Grzegorz Prajsner --- drivers/block/rnbd/rnbd-srv.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index 2df8941a6b14..9b3fdc202e15 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -145,18 +145,30 @@ static int process_rdma(struct rnbd_srv_session *srv_sess, priv->sess_dev = sess_dev; priv->id = id; - bio = bio_alloc(file_bdev(sess_dev->bdev_file), 1, + bio = bio_alloc(file_bdev(sess_dev->bdev_file), !!datalen, rnbd_to_bio_flags(le32_to_cpu(msg->rw)), GFP_KERNEL); - bio_add_virt_nofail(bio, data, datalen); - - bio->bi_opf = rnbd_to_bio_flags(le32_to_cpu(msg->rw)); - if (bio_has_data(bio) && - bio->bi_iter.bi_size != le32_to_cpu(msg->bi_size)) { - rnbd_srv_err_rl(sess_dev, "Datalen mismatch: bio bi_size (%u), bi_size (%u)\n", - bio->bi_iter.bi_size, msg->bi_size); - err = -EINVAL; - goto bio_put; + if (unlikely(!bio)) { + err = -ENOMEM; + goto put_sess_dev; } + + if (!datalen) { + /* + * For special requests like DISCARD and WRITE_ZEROES, the datalen is zero. + */ + bio->bi_iter.bi_size = le32_to_cpu(msg->bi_size); + } else { + bio_add_virt_nofail(bio, data, datalen); + bio->bi_opf = rnbd_to_bio_flags(le32_to_cpu(msg->rw)); + if (bio->bi_iter.bi_size != le32_to_cpu(msg->bi_size)) { + rnbd_srv_err_rl(sess_dev, + "Datalen mismatch: bio bi_size (%u), bi_size (%u)\n", + bio->bi_iter.bi_size, msg->bi_size); + err = -EINVAL; + goto bio_put; + } + } + bio->bi_end_io = rnbd_dev_bi_end_io; bio->bi_private = priv; bio->bi_iter.bi_sector = le64_to_cpu(msg->sector); @@ -170,6 +182,7 @@ static int process_rdma(struct rnbd_srv_session *srv_sess, bio_put: bio_put(bio); +put_sess_dev: rnbd_put_sess_dev(sess_dev); err: kfree(priv); -- 2.43.0 Before using the data buffer to send back the response message, zero it completely. This prevents any stray bytes to be picked up by the client side when there the message is exchanged between different protocol versions. Signed-off-by: Md Haris Iqbal Signed-off-by: Jack Wang Signed-off-by: Grzegorz Prajsner --- drivers/block/rnbd/rnbd-srv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index 9b3fdc202e15..7eeb321d6140 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -551,6 +551,8 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp, { struct block_device *bdev = file_bdev(sess_dev->bdev_file); + memset(rsp, 0, sizeof(*rsp)); + rsp->hdr.type = cpu_to_le16(RNBD_MSG_OPEN_RSP); rsp->device_id = cpu_to_le32(sess_dev->device_id); rsp->nsectors = cpu_to_le64(bdev_nr_sectors(bdev)); @@ -657,6 +659,7 @@ static void process_msg_sess_info(struct rnbd_srv_session *srv_sess, trace_process_msg_sess_info(srv_sess, sess_info_msg); + memset(rsp, 0, sizeof(*rsp)); rsp->hdr.type = cpu_to_le16(RNBD_MSG_SESS_INFO_RSP); rsp->ver = srv_sess->ver; } -- 2.43.0