Introduce the function blkdev_get_zone_info() to obtain a single zone information from cached zone data, that is, either from the zone write plug for the target zone if it exists and from the disk zones_cond array otherwise. Since sequential zones that do not have a zone write plug are either full, empty or in a bad state (read-only or offline), the zone write pointer can be inferred from the zone condition cached in the disk zones_cond array. For sequential zones that have a zone write plug, the zone condition and zone write pointer are obtained from the condition and write pointer offset managed with the zone write plug. This allows obtaining the information for a zone much more quickly than having to execute a report zones command on the device. blkdev_get_zone_info() falls back to using a regular zone report if the target zone is flagged as needing an update with the BLK_ZONE_WPLUG_NEED_WP_UPDATE flag, or if the target device does not use zone write plugs (i.e. a device mapper device). In this case, the new function blkdev_report_zone_fallback() is used and the zone condition is reported consistantly with the cahced report, that is, the BLK_ZONE_COND_ACTIVE condition is used in place of the implicit open, explicit open and closed conditions. This is achieved by adding the .report_active field to struct blk_report_zones_args and by having disk_report_zone() sets the correct zone condition if .report_active is true. In preparation for using blkdev_get_zone_info() in upcoming file systems changes, also export this function as a GPL symbol. Signed-off-by: Damien Le Moal --- block/blk-zoned.c | 165 +++++++++++++++++++++++++++++++++++++++-- include/linux/blkdev.h | 3 + 2 files changed, 160 insertions(+), 8 deletions(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 00cfd9431c3e..03394e38645f 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -202,8 +202,24 @@ EXPORT_SYMBOL_GPL(bdev_zone_is_seq); struct blk_report_zones_args { report_zones_cb cb; void *data; + bool report_active; }; +static int blkdev_do_report_zones(struct block_device *bdev, sector_t sector, + unsigned int nr_zones, + struct blk_report_zones_args *args) +{ + struct gendisk *disk = bdev->bd_disk; + + if (!bdev_is_zoned(bdev) || WARN_ON_ONCE(!disk->fops->report_zones)) + return -EOPNOTSUPP; + + if (!nr_zones || sector >= get_capacity(disk)) + return 0; + + return disk->fops->report_zones(disk, sector, nr_zones, args); +} + /** * blkdev_report_zones - Get zones information * @bdev: Target block device @@ -226,19 +242,12 @@ struct blk_report_zones_args { int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data) { - struct gendisk *disk = bdev->bd_disk; struct blk_report_zones_args args = { .cb = cb, .data = data, }; - if (!bdev_is_zoned(bdev) || WARN_ON_ONCE(!disk->fops->report_zones)) - return -EOPNOTSUPP; - - if (!nr_zones || sector >= get_capacity(disk)) - return 0; - - return disk->fops->report_zones(disk, sector, nr_zones, &args); + return blkdev_do_report_zones(bdev, sector, nr_zones, &args); } EXPORT_SYMBOL_GPL(blkdev_report_zones); @@ -803,6 +812,23 @@ static void disk_zone_wplug_sync_wp_offset(struct gendisk *disk, int disk_report_zone(struct gendisk *disk, struct blk_zone *zone, unsigned int idx, struct blk_report_zones_args *args) { + if (args->report_active) { + /* + * If we come here, then this is a report zones as a fallback + * for a cached report. So collapse the implicit open, explicit + * open and closed conditions into the active zone condition. + */ + switch (zone->cond) { + case BLK_ZONE_COND_IMP_OPEN: + case BLK_ZONE_COND_EXP_OPEN: + case BLK_ZONE_COND_CLOSED: + zone->cond = BLK_ZONE_COND_ACTIVE; + break; + default: + break; + } + } + if (disk->zone_wplugs_hash) disk_zone_wplug_sync_wp_offset(disk, zone); @@ -813,6 +839,129 @@ int disk_report_zone(struct gendisk *disk, struct blk_zone *zone, } EXPORT_SYMBOL_GPL(disk_report_zone); +static int blkdev_report_zone_cb(struct blk_zone *zone, unsigned int idx, + void *data) +{ + memcpy(data, zone, sizeof(struct blk_zone)); + return 0; +} + +static int blkdev_report_zone_fallback(struct block_device *bdev, + sector_t sector, struct blk_zone *zone) +{ + struct blk_report_zones_args args = { + .cb = blkdev_report_zone_cb, + .data = zone, + .report_active = true, + }; + + return blkdev_do_report_zones(bdev, sector, 1, &args); +} + +/** + * blkdev_get_zone_info - Get a zone information from cached data + * @bdev: Target block device + * @sector: Sector contained by the target zone + * @zone: zone structure to return the zone information + * + * Description: + * Get the zone information for the zone containing @sector using the zone + * write plug of the target zone, if one exist, or the disk zone condition + * array otherwise. The zone condition may be reported as being + * the BLK_ZONE_COND_ACTIVE condition for a zone that is in the implicit + * open, explicit open or closed condition. + * + * Returns 0 on success and a negative error code on failure. + */ +int blkdev_get_zone_info(struct block_device *bdev, sector_t sector, + struct blk_zone *zone) +{ + struct gendisk *disk = bdev->bd_disk; + sector_t zone_sectors = bdev_zone_sectors(bdev); + struct blk_zone_wplug *zwplug; + unsigned long flags; + u8 *zones_cond; + + if (!bdev_is_zoned(bdev)) + return -EOPNOTSUPP; + + if (sector >= get_capacity(disk)) + return -EINVAL; + + memset(zone, 0, sizeof(*zone)); + sector = sector & (~(zone_sectors - 1)); + + rcu_read_lock(); + zones_cond = rcu_dereference(disk->zones_cond); + if (!disk->zone_wplugs_hash || !zones_cond) { + rcu_read_unlock(); + return blkdev_report_zone_fallback(bdev, sector, zone); + } + zone->cond = zones_cond[disk_zone_no(disk, sector)]; + rcu_read_unlock(); + + zone->start = sector; + zone->len = zone_sectors; + + /* + * If this is a conventional zone, we do not have a zone write plug and + * can report the zone immediately. + */ + if (zone->cond == BLK_ZONE_COND_NOT_WP) { + zone->type = BLK_ZONE_TYPE_CONVENTIONAL; + zone->capacity = zone_sectors; + zone->wp = ULLONG_MAX; + return 0; + } + + /* + * This is a sequential write required zone. If the zone is read-only or + * offline, only set the zone write pointer to an invalid value and + * report the zone. + */ + zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ; + if (disk_zone_is_last(disk, zone)) + zone->capacity = disk->last_zone_capacity; + else + zone->capacity = disk->zone_capacity; + + if (zone->cond == BLK_ZONE_COND_READONLY || + zone->cond == BLK_ZONE_COND_OFFLINE) { + zone->wp = ULLONG_MAX; + return 0; + } + + /* + * If the zone does not have a zone write plug, it is either full or + * empty, as we otherwise would have a zone write plug for it. In this + * case, set the write pointer accordingly and report the zone. + * Otherwise, if we have a zone write plug, use it. + */ + zwplug = disk_get_zone_wplug(disk, sector); + if (!zwplug) { + if (zone->cond == BLK_ZONE_COND_FULL) + zone->wp = sector + zone_sectors; + else + zone->wp = sector; + return 0; + } + + spin_lock_irqsave(&zwplug->lock, flags); + if (zwplug->flags & BLK_ZONE_WPLUG_NEED_WP_UPDATE) { + spin_unlock_irqrestore(&zwplug->lock, flags); + disk_put_zone_wplug(zwplug); + return blkdev_report_zone_fallback(bdev, sector, zone); + } + zone->cond = zwplug->cond; + zone->wp = sector + zwplug->wp_offset; + spin_unlock_irqrestore(&zwplug->lock, flags); + + disk_put_zone_wplug(zwplug); + + return 0; +} +EXPORT_SYMBOL_GPL(blkdev_get_zone_info); + static void blk_zone_reset_bio_endio(struct bio *bio) { struct gendisk *disk = bio->bi_bdev->bd_disk; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 15cc13006d06..98a0ed989d21 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -436,6 +436,9 @@ typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, int disk_report_zone(struct gendisk *disk, struct blk_zone *zone, unsigned int idx, struct blk_report_zones_args *args); +int blkdev_get_zone_info(struct block_device *bdev, sector_t sector, + struct blk_zone *zone); + #define BLK_ALL_ZONES ((unsigned int)-1) int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); -- 2.51.0