From: Keith Busch A bio segment might have partial block data with the rest continuing into the next segments. At the same time, the protection information may also be split in multiple segments. The most likely way that may happen is if two requests merge, or if we're directly using the io_uring user metadata. Further, it may be possible to have unalign the protection fields from the user space buffer, or if there are odd additional opaque bytes in front or in back of the protection information metadata region. Change up the iteration to allow spanning multiple segments. This patch is mostly a re-write of the protection information handling to allow any arbitrary alignments, so it's probably easier to review the end result rather than the diff. Note, this strives to be a very general solution that should work in scenarios that I think unlikely to ever encounter in real life. The original goal was to simply remove the 'lim->dma_alignment' that protection information was imposing, and it wasn't too much further to support even arbitrary integrity buffers either. Signed-off-by: Keith Busch --- block/blk-settings.c | 10 - block/t10-pi.c | 858 ++++++++++++++++++++++++----------------- include/linux/t10-pi.h | 4 +- 3 files changed, 505 insertions(+), 367 deletions(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index ccffa3ca85c36..345b6a271cc35 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -184,16 +184,6 @@ static int blk_validate_integrity_limits(struct queue_limits *lim) if (!bi->interval_exp) bi->interval_exp = ilog2(lim->logical_block_size); - /* - * The PI generation / validation helpers do not expect intervals to - * straddle multiple bio_vecs. Enforce alignment so that those are - * never generated, and that each buffer is aligned as expected. - */ - if (bi->csum_type) { - lim->dma_alignment = max(lim->dma_alignment, - (1U << bi->interval_exp) - 1); - } - return 0; } diff --git a/block/t10-pi.c b/block/t10-pi.c index 0c4ed97021460..1ead263de206b 100644 --- a/block/t10-pi.c +++ b/block/t10-pi.c @@ -12,230 +12,157 @@ #include #include "blk.h" +union pi_tuple { + struct crc64_pi_tuple *crc64_pi; + struct t10_pi_tuple *t10_pi; +}; + struct blk_integrity_iter { - void *prot_buf; - void *data_buf; - sector_t seed; - unsigned int data_size; - unsigned short interval; - const char *disk_name; + struct bio *bio; + struct bio_integrity_payload *bip; + struct blk_integrity *bi; + + struct bvec_iter data_iter; + struct bvec_iter prot_iter; + unsigned int interval_remaining; + u64 seed; + u64 crc; }; -static __be16 t10_pi_csum(__be16 csum, void *data, unsigned int len, - unsigned char csum_type) +static void blk_crc(struct blk_integrity_iter *iter, void *data, unsigned int len) { - if (csum_type == BLK_INTEGRITY_CSUM_IP) - return (__force __be16)ip_compute_csum(data, len); - return cpu_to_be16(crc_t10dif_update(be16_to_cpu(csum), data, len)); + switch (iter->bi->csum_type) { + case BLK_INTEGRITY_CSUM_CRC64: + iter->crc = crc64_nvme(iter->crc, data, len); + break; + case BLK_INTEGRITY_CSUM_CRC: + iter->crc = crc_t10dif_update(iter->crc, data, len); + break; + case BLK_INTEGRITY_CSUM_IP: + iter->crc = csum_partial(data, len, iter->crc); + break; + default: + WARN_ON_ONCE(1); + iter->crc = U64_MAX; + break; + } } -/* - * Type 1 and Type 2 protection use the same format: 16 bit guard tag, - * 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref - * tag. +/** + * blk_integrity_crc_offset - update the crc for formats that have metadata + * padding in front of the protection information + * field. */ -static void t10_pi_generate(struct blk_integrity_iter *iter, - struct blk_integrity *bi) +static void blk_integrity_crc_offset(struct blk_integrity_iter *iter) { - u8 offset = bi->pi_offset; - unsigned int i; - - for (i = 0 ; i < iter->data_size ; i += iter->interval) { - struct t10_pi_tuple *pi = iter->prot_buf + offset; - - pi->guard_tag = t10_pi_csum(0, iter->data_buf, iter->interval, - bi->csum_type); - if (offset) - pi->guard_tag = t10_pi_csum(pi->guard_tag, - iter->prot_buf, offset, bi->csum_type); - pi->app_tag = 0; - - if (bi->flags & BLK_INTEGRITY_REF_TAG) - pi->ref_tag = cpu_to_be32(lower_32_bits(iter->seed)); - else - pi->ref_tag = 0; - - iter->data_buf += iter->interval; - iter->prot_buf += bi->metadata_size; - iter->seed++; + unsigned int offset = iter->bi->pi_offset; + + while (offset > 0) { + struct bio_vec pbv = mp_bvec_iter_bvec(iter->bip->bip_vec, + iter->prot_iter); + unsigned int len = min(pbv.bv_len, offset); + void *prot_buf = bvec_kmap_local(&pbv); + + bvec_iter_advance_single(iter->bip->bip_vec, &iter->prot_iter, len); + blk_crc(iter, prot_buf, len); + kunmap_local(prot_buf); + offset -= len; } } -static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter, - struct blk_integrity *bi) -{ - u8 offset = bi->pi_offset; - unsigned int i; - - for (i = 0 ; i < iter->data_size ; i += iter->interval) { - struct t10_pi_tuple *pi = iter->prot_buf + offset; - __be16 csum; - - if (bi->flags & BLK_INTEGRITY_REF_TAG) { - if (pi->app_tag == T10_PI_APP_ESCAPE) - goto next; - - if (be32_to_cpu(pi->ref_tag) != - lower_32_bits(iter->seed)) { - pr_err("%s: ref tag error at location %llu " \ - "(rcvd %u)\n", iter->disk_name, - (unsigned long long) - iter->seed, be32_to_cpu(pi->ref_tag)); - return BLK_STS_PROTECTION; - } - } else { - if (pi->app_tag == T10_PI_APP_ESCAPE && - pi->ref_tag == T10_PI_REF_ESCAPE) - goto next; - } - - csum = t10_pi_csum(0, iter->data_buf, iter->interval, - bi->csum_type); - if (offset) - csum = t10_pi_csum(csum, iter->prot_buf, offset, - bi->csum_type); - - if (pi->guard_tag != csum) { - pr_err("%s: guard tag error at sector %llu " \ - "(rcvd %04x, want %04x)\n", iter->disk_name, - (unsigned long long)iter->seed, - be16_to_cpu(pi->guard_tag), be16_to_cpu(csum)); - return BLK_STS_PROTECTION; - } - -next: - iter->data_buf += iter->interval; - iter->prot_buf += bi->metadata_size; - iter->seed++; +static void __blk_integrity_copy_from_tuple(struct bio_integrity_payload *bip, + struct bvec_iter *iter, void *tuple, unsigned int tuple_size) +{ + while (tuple_size) { + struct bio_vec pbv = mp_bvec_iter_bvec(bip->bip_vec, *iter); + unsigned int len = min(tuple_size, pbv.bv_len); + void *prot_buf = bvec_kmap_local(&pbv); + + bvec_iter_advance_single(bip->bip_vec, iter, len); + memcpy(prot_buf, tuple, len); + tuple += len; + tuple_size -= len; + kunmap_local(prot_buf); } - - return BLK_STS_OK; } /** - * t10_pi_type1_prepare - prepare PI prior submitting request to device - * @rq: request with PI that should be prepared - * - * For Type 1/Type 2, the virtual start sector is the one that was - * originally submitted by the block layer for the ref_tag usage. Due to - * partitioning, MD/DM cloning, etc. the actual physical start sector is - * likely to be different. Remap protection information to match the - * physical LBA. + * blk_integrity_copy_from_tuple- copy from @tuple to the @iter */ -static void t10_pi_type1_prepare(struct request *rq) +static void blk_integrity_copy_from_tuple(struct blk_integrity_iter *iter, + void *tuple) { - struct blk_integrity *bi = &rq->q->limits.integrity; - const int tuple_sz = bi->metadata_size; - u32 ref_tag = t10_pi_ref_tag(rq); - u8 offset = bi->pi_offset; - struct bio *bio; - - __rq_for_each_bio(bio, rq) { - struct bio_integrity_payload *bip = bio_integrity(bio); - u32 virt = bip_get_seed(bip) & 0xffffffff; - struct bio_vec iv; - struct bvec_iter iter; - - /* Already remapped? */ - if (bip->bip_flags & BIP_MAPPED_INTEGRITY) - break; - - bip_for_each_vec(iv, bip, iter) { - unsigned int j; - void *p; - - p = bvec_kmap_local(&iv); - for (j = 0; j < iv.bv_len; j += tuple_sz) { - struct t10_pi_tuple *pi = p + offset; - - if (be32_to_cpu(pi->ref_tag) == virt) - pi->ref_tag = cpu_to_be32(ref_tag); - virt++; - ref_tag++; - p += tuple_sz; - } - kunmap_local(p); - } + __blk_integrity_copy_from_tuple(iter->bip, &iter->prot_iter, + tuple, iter->bi->pi_tuple_size); +} - bip->bip_flags |= BIP_MAPPED_INTEGRITY; +static void __blk_integrity_copy_to_tuple(struct bio_integrity_payload *bip, + struct bvec_iter *iter, void *tuple, unsigned int tuple_size) +{ + while (tuple_size) { + struct bio_vec pbv = mp_bvec_iter_bvec(bip->bip_vec, *iter); + unsigned int len = min(tuple_size, pbv.bv_len); + void *prot_buf = bvec_kmap_local(&pbv); + + bvec_iter_advance_single(bip->bip_vec, iter, len); + memcpy(tuple, prot_buf, len); + tuple += len; + tuple_size -= len; + kunmap_local(prot_buf); } } /** - * t10_pi_type1_complete - prepare PI prior returning request to the blk layer - * @rq: request with PI that should be prepared - * @nr_bytes: total bytes to prepare - * - * For Type 1/Type 2, the virtual start sector is the one that was - * originally submitted by the block layer for the ref_tag usage. Due to - * partitioning, MD/DM cloning, etc. the actual physical start sector is - * likely to be different. Since the physical start sector was submitted - * to the device, we should remap it back to virtual values expected by the - * block layer. + * blk_integrity_copy_to_tuple - copy to &tuple from @iter */ -static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes) +static void blk_integrity_copy_to_tuple(struct blk_integrity_iter *iter, void *tuple) { - struct blk_integrity *bi = &rq->q->limits.integrity; - unsigned intervals = nr_bytes >> bi->interval_exp; - const int tuple_sz = bi->metadata_size; - u32 ref_tag = t10_pi_ref_tag(rq); - u8 offset = bi->pi_offset; - struct bio *bio; - - __rq_for_each_bio(bio, rq) { - struct bio_integrity_payload *bip = bio_integrity(bio); - u32 virt = bip_get_seed(bip) & 0xffffffff; - struct bio_vec iv; - struct bvec_iter iter; - - bip_for_each_vec(iv, bip, iter) { - unsigned int j; - void *p; - - p = bvec_kmap_local(&iv); - for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) { - struct t10_pi_tuple *pi = p + offset; - - if (be32_to_cpu(pi->ref_tag) == ref_tag) - pi->ref_tag = cpu_to_be32(virt); - virt++; - ref_tag++; - intervals--; - p += tuple_sz; - } - kunmap_local(p); - } - } + __blk_integrity_copy_to_tuple(iter->bip, &iter->prot_iter, + tuple, iter->bi->pi_tuple_size); } -static __be64 ext_pi_crc64(u64 crc, void *data, unsigned int len) +static void blk_set_ext_pi(void *prot_buf, struct blk_integrity_iter *iter) { - return cpu_to_be64(crc64_nvme(crc, data, len)); + struct crc64_pi_tuple *pi = prot_buf; + + if (unlikely((unsigned long)prot_buf & (sizeof(*pi) - 1))) { + put_unaligned_be16(0, &pi->app_tag); + put_unaligned_be64(iter->crc, &pi->guard_tag); + put_unaligned_be48(iter->seed, &pi->ref_tag); + } else { + pi->app_tag = 0; + pi->guard_tag = cpu_to_be64(iter->crc); + put_unaligned_be48(iter->seed, &pi->ref_tag); + } } -static void ext_pi_crc64_generate(struct blk_integrity_iter *iter, - struct blk_integrity *bi) +static void blk_set_t10_pi(void *prot_buf, struct blk_integrity_iter *iter) { - u8 offset = bi->pi_offset; - unsigned int i; - - for (i = 0 ; i < iter->data_size ; i += iter->interval) { - struct crc64_pi_tuple *pi = iter->prot_buf + offset; + struct t10_pi_tuple *pi = prot_buf; - pi->guard_tag = ext_pi_crc64(0, iter->data_buf, iter->interval); - if (offset) - pi->guard_tag = ext_pi_crc64(be64_to_cpu(pi->guard_tag), - iter->prot_buf, offset); + if (unlikely((unsigned long)prot_buf & (sizeof(*pi) - 1))) { + put_unaligned_be16(0, &pi->app_tag); + put_unaligned_be16((u16)iter->crc, &pi->guard_tag); + put_unaligned_be32((u32)iter->seed, &pi->ref_tag); + } else { pi->app_tag = 0; + pi->guard_tag = cpu_to_be16((u16)iter->crc); + pi->ref_tag = cpu_to_be32((u32)iter->seed); + } +} - if (bi->flags & BLK_INTEGRITY_REF_TAG) - put_unaligned_be48(iter->seed, pi->ref_tag); - else - put_unaligned_be48(0ULL, pi->ref_tag); +static void blk_set_ip_pi(void *prot_buf, struct blk_integrity_iter *iter) +{ + struct t10_pi_tuple *pi = prot_buf; - iter->data_buf += iter->interval; - iter->prot_buf += bi->metadata_size; - iter->seed++; + if (unlikely((unsigned long)prot_buf & (sizeof(*pi) - 1))) { + put_unaligned_be16(0, &pi->app_tag); + __put_unaligned_t(__be16, (__force __be16)(iter->crc), &pi->guard_tag); + put_unaligned_be32(iter->seed, &pi->ref_tag); + } else { + pi->app_tag = 0; + pi->guard_tag = (__force __be16)iter->crc; + pi->ref_tag = cpu_to_be32(iter->seed); } } @@ -247,227 +174,448 @@ static bool ext_pi_ref_escape(const u8 ref_tag[6]) } static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter, - struct blk_integrity *bi) -{ - u8 offset = bi->pi_offset; - unsigned int i; - - for (i = 0; i < iter->data_size; i += iter->interval) { - struct crc64_pi_tuple *pi = iter->prot_buf + offset; - u64 ref, seed; - __be64 csum; - - if (bi->flags & BLK_INTEGRITY_REF_TAG) { - if (pi->app_tag == T10_PI_APP_ESCAPE) - goto next; - - ref = get_unaligned_be48(pi->ref_tag); - seed = lower_48_bits(iter->seed); - if (ref != seed) { - pr_err("%s: ref tag error at location %llu (rcvd %llu)\n", - iter->disk_name, seed, ref); - return BLK_STS_PROTECTION; - } - } else { - if (pi->app_tag == T10_PI_APP_ESCAPE && - ext_pi_ref_escape(pi->ref_tag)) - goto next; - } + struct crc64_pi_tuple *pi) +{ + u64 guard; + u64 ref; + u16 app; + + if (unlikely((unsigned long)pi & (sizeof(*pi) - 1))) { + app = get_unaligned_be16(&pi->app_tag); + guard = get_unaligned_be64(&pi->guard_tag); + ref = get_unaligned_be48(pi->ref_tag); + } else { + app = be16_to_cpu(pi->app_tag); + guard = be64_to_cpu(pi->guard_tag); + ref = get_unaligned_be48(pi->ref_tag); + } - csum = ext_pi_crc64(0, iter->data_buf, iter->interval); - if (offset) - csum = ext_pi_crc64(be64_to_cpu(csum), iter->prot_buf, - offset); + if (iter->bi->flags & BLK_INTEGRITY_REF_TAG) { + u64 seed = lower_48_bits(iter->seed); - if (pi->guard_tag != csum) { - pr_err("%s: guard tag error at sector %llu " \ - "(rcvd %016llx, want %016llx)\n", - iter->disk_name, (unsigned long long)iter->seed, - be64_to_cpu(pi->guard_tag), be64_to_cpu(csum)); + if (app == T10_PI_APP_ESCAPE) + return BLK_STS_OK; + if (ref != seed) { + pr_err("%s: ref tag error at location %llu (rcvd %llu)\n", + iter->bio->bi_bdev->bd_disk->disk_name, seed, + ref); return BLK_STS_PROTECTION; } + } else if (app == T10_PI_APP_ESCAPE && + ext_pi_ref_escape(pi->ref_tag)) { + return BLK_STS_OK; + } -next: - iter->data_buf += iter->interval; - iter->prot_buf += bi->metadata_size; - iter->seed++; + if (guard != iter->crc) { + pr_err("%s: guard tag error at sector %llu (rcvd %016llx, want %016llx)\n", + iter->bio->bi_bdev->bd_disk->disk_name, iter->seed, + guard, iter->crc); + return BLK_STS_PROTECTION; } return BLK_STS_OK; } -static void ext_pi_type1_prepare(struct request *rq) +static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter, + struct t10_pi_tuple *pi) { - struct blk_integrity *bi = &rq->q->limits.integrity; - const int tuple_sz = bi->metadata_size; - u64 ref_tag = ext_pi_ref_tag(rq); - u8 offset = bi->pi_offset; - struct bio *bio; + u16 guard; + u32 ref; + u16 app; + + if (unlikely((unsigned long)pi & (sizeof(*pi) - 1))) { + guard = get_unaligned_be16(&pi->guard_tag); + ref = get_unaligned_be32(&pi->ref_tag); + app = get_unaligned_be16(&pi->app_tag); + } else { + guard = be16_to_cpu(pi->guard_tag); + ref = be32_to_cpu(pi->ref_tag); + app = be16_to_cpu(pi->app_tag); + } - __rq_for_each_bio(bio, rq) { - struct bio_integrity_payload *bip = bio_integrity(bio); - u64 virt = lower_48_bits(bip_get_seed(bip)); - struct bio_vec iv; - struct bvec_iter iter; - - /* Already remapped? */ - if (bip->bip_flags & BIP_MAPPED_INTEGRITY) - break; - - bip_for_each_vec(iv, bip, iter) { - unsigned int j; - void *p; - - p = bvec_kmap_local(&iv); - for (j = 0; j < iv.bv_len; j += tuple_sz) { - struct crc64_pi_tuple *pi = p + offset; - u64 ref = get_unaligned_be48(pi->ref_tag); - - if (ref == virt) - put_unaligned_be48(ref_tag, pi->ref_tag); - virt++; - ref_tag++; - p += tuple_sz; - } - kunmap_local(p); + if (iter->bi->flags & BLK_INTEGRITY_REF_TAG) { + u32 seed = lower_32_bits(iter->seed); + + if (app == T10_PI_APP_ESCAPE) + return BLK_STS_OK; + if (ref != seed) { + pr_err("%s: ref tag error at location %u (rcvd %u)\n", + iter->bio->bi_bdev->bd_disk->disk_name, seed, + ref); + return BLK_STS_PROTECTION; } + } else if (app == T10_PI_APP_ESCAPE && + ref == T10_PI_REF_ESCAPE) { + return BLK_STS_OK; + } - bip->bip_flags |= BIP_MAPPED_INTEGRITY; + if (guard != (u16)iter->crc) { + pr_err("%s: guard tag error at sector %llu (rcvd %04x, want %04x)\n", + iter->bio->bi_bdev->bd_disk->disk_name, iter->seed, + guard, (u16)iter->crc); + return BLK_STS_PROTECTION; } + + return BLK_STS_OK; } -static void ext_pi_type1_complete(struct request *rq, unsigned int nr_bytes) +static blk_status_t blk_integrity_verify(struct blk_integrity_iter *iter, + void *tuple) { - struct blk_integrity *bi = &rq->q->limits.integrity; - unsigned intervals = nr_bytes >> bi->interval_exp; - const int tuple_sz = bi->metadata_size; - u64 ref_tag = ext_pi_ref_tag(rq); - u8 offset = bi->pi_offset; - struct bio *bio; + switch (iter->bi->csum_type) { + case BLK_INTEGRITY_CSUM_CRC64: + return ext_pi_crc64_verify(iter, tuple); + case BLK_INTEGRITY_CSUM_CRC: + case BLK_INTEGRITY_CSUM_IP: + return t10_pi_verify(iter, tuple); + default: + return BLK_STS_OK; + } +} - __rq_for_each_bio(bio, rq) { - struct bio_integrity_payload *bip = bio_integrity(bio); - u64 virt = lower_48_bits(bip_get_seed(bip)); - struct bio_vec iv; - struct bvec_iter iter; - - bip_for_each_vec(iv, bip, iter) { - unsigned int j; - void *p; - - p = bvec_kmap_local(&iv); - for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) { - struct crc64_pi_tuple *pi = p + offset; - u64 ref = get_unaligned_be48(pi->ref_tag); - - if (ref == ref_tag) - put_unaligned_be48(virt, pi->ref_tag); - virt++; - ref_tag++; - intervals--; - p += tuple_sz; - } - kunmap_local(p); - } +static void blk_integrity_set(struct blk_integrity_iter *iter, + void *tuple) +{ + switch (iter->bi->csum_type) { + case BLK_INTEGRITY_CSUM_CRC64: + return blk_set_ext_pi(tuple, iter); + case BLK_INTEGRITY_CSUM_CRC: + return blk_set_t10_pi(tuple, iter); + case BLK_INTEGRITY_CSUM_IP: + return blk_set_ip_pi(tuple, iter); + default: + WARN_ON_ONCE(1); + return; } } -void blk_integrity_generate(struct bio *bio) +static blk_status_t blk_integrity_interval(struct blk_integrity_iter *iter, bool verify) +{ + blk_status_t ret = BLK_STS_OK; + union pi_tuple tuple; + void *ptuple = &tuple; + struct bio_vec pbv; + + blk_integrity_crc_offset(iter); + pbv = mp_bvec_iter_bvec(iter->bip->bip_vec, iter->prot_iter); + if (pbv.bv_len >= iter->bi->pi_tuple_size) { + ptuple = bvec_kmap_local(&pbv); + bvec_iter_advance_single(iter->bip->bip_vec, &iter->prot_iter, + iter->bi->metadata_size - iter->bi->pi_offset); + } else if (verify) { + blk_integrity_copy_to_tuple(iter, ptuple); + } + + if (verify) + ret = blk_integrity_verify(iter, ptuple); + else + blk_integrity_set(iter, ptuple); + + if (ptuple != &tuple) + kunmap_local(ptuple); + else if (!verify) + blk_integrity_copy_from_tuple(iter, ptuple); + + iter->interval_remaining = 1 << iter->bi->interval_exp; + iter->crc = 0; + iter->seed++; + + return ret; +} + +static void blk_integrity_iterate(struct bio *bio, struct bvec_iter *data_iter, + bool verify) { struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); struct bio_integrity_payload *bip = bio_integrity(bio); - struct blk_integrity_iter iter; - struct bvec_iter bviter; - struct bio_vec bv; - - iter.disk_name = bio->bi_bdev->bd_disk->disk_name; - iter.interval = 1 << bi->interval_exp; - iter.seed = bio->bi_iter.bi_sector; - iter.prot_buf = bvec_virt(bip->bip_vec); - bio_for_each_segment(bv, bio, bviter) { + struct blk_integrity_iter iter = { + .bio = bio, + .bip = bip, + .bi = bi, + .data_iter = *data_iter, + .prot_iter = bip->bip_iter, + .interval_remaining = 1 << bi->interval_exp, + .seed = data_iter->bi_sector, + .crc = 0, + }; + blk_status_t ret = BLK_STS_OK; + + while (iter.data_iter.bi_size && ret == BLK_STS_OK) { + struct bio_vec bv = mp_bvec_iter_bvec(iter.bio->bi_io_vec, + iter.data_iter); void *kaddr = bvec_kmap_local(&bv); + void *data = kaddr; + + bvec_iter_advance_single(iter.bio->bi_io_vec, &iter.data_iter, + bv.bv_len); + while (bv.bv_len) { + unsigned int len = min(iter.interval_remaining, bv.bv_len); + + blk_crc(&iter, data, len); + bv.bv_len -= len; + data += len; - iter.data_buf = kaddr; - iter.data_size = bv.bv_len; - switch (bi->csum_type) { - case BLK_INTEGRITY_CSUM_CRC64: - ext_pi_crc64_generate(&iter, bi); - break; - case BLK_INTEGRITY_CSUM_CRC: - case BLK_INTEGRITY_CSUM_IP: - t10_pi_generate(&iter, bi); - break; - default: - break; + iter.interval_remaining -= len; + if (!iter.interval_remaining) + ret = blk_integrity_interval(&iter, verify); } kunmap_local(kaddr); } + + if (ret) + bio->bi_status = ret; +} + +void blk_integrity_generate(struct bio *bio) +{ + struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); + + switch (bi->csum_type) { + case BLK_INTEGRITY_CSUM_CRC64: + case BLK_INTEGRITY_CSUM_CRC: + case BLK_INTEGRITY_CSUM_IP: + blk_integrity_iterate(bio, &bio->bi_iter, false); + break; + default: + break; + } } void blk_integrity_verify_iter(struct bio *bio, struct bvec_iter *saved_iter) { struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); - struct bio_integrity_payload *bip = bio_integrity(bio); - struct blk_integrity_iter iter; - struct bvec_iter bviter; - struct bio_vec bv; + + switch (bi->csum_type) { + case BLK_INTEGRITY_CSUM_CRC64: + case BLK_INTEGRITY_CSUM_CRC: + case BLK_INTEGRITY_CSUM_IP: + blk_integrity_iterate(bio, saved_iter, true); + break; + default: + break; + } +} + +/** + * blk_pi_advance_offset - advance @iter past the protection offset + * + * For protection formats that contain front padding on the metadata region. + */ +static void blk_pi_advance_offset(struct blk_integrity *bi, + struct bio_integrity_payload *bip, + struct bvec_iter *iter) +{ + unsigned int offset = bi->pi_offset; + + while (offset > 0) { + struct bio_vec bv = mp_bvec_iter_bvec(bip->bip_vec, *iter); + unsigned int len = min(bv.bv_len, offset); + + bvec_iter_advance_single(bip->bip_vec, iter, len); + offset -= len; + } +} + +static void *blk_tuple_remap_start(union pi_tuple *tuple, struct blk_integrity *bi, + struct bio_integrity_payload *bip, + struct bvec_iter *iter) +{ + struct bvec_iter titer; + struct bio_vec pbv; + + blk_pi_advance_offset(bi, bip, iter); + pbv = mp_bvec_iter_bvec(bip->bip_vec, *iter); + if (likely(pbv.bv_len >= bi->pi_tuple_size)) + return bvec_kmap_local(&pbv); /* - * At the moment verify is called bi_iter has been advanced during split - * and completion, so use the copy created during submission here. + * We need to preserve the state of the original iter for the + * copy_from_tuple at the end, so make a temp iter for here. */ - iter.disk_name = bio->bi_bdev->bd_disk->disk_name; - iter.interval = 1 << bi->interval_exp; - iter.seed = saved_iter->bi_sector; - iter.prot_buf = bvec_virt(bip->bip_vec); - __bio_for_each_segment(bv, bio, bviter, *saved_iter) { - void *kaddr = bvec_kmap_local(&bv); - blk_status_t ret = BLK_STS_OK; - - iter.data_buf = kaddr; - iter.data_size = bv.bv_len; - switch (bi->csum_type) { - case BLK_INTEGRITY_CSUM_CRC64: - ret = ext_pi_crc64_verify(&iter, bi); - break; - case BLK_INTEGRITY_CSUM_CRC: - case BLK_INTEGRITY_CSUM_IP: - ret = t10_pi_verify(&iter, bi); - break; - default: - break; - } - kunmap_local(kaddr); + titer = *iter; + __blk_integrity_copy_to_tuple(bip, &titer, tuple, bi->pi_tuple_size); + return tuple; +} - if (ret) { - bio->bi_status = ret; - return; - } +static void *blk_tuple_remap_end(union pi_tuple *tuple, void *ptuple, + struct blk_integrity *bi, + struct bio_integrity_payload *bip, + struct bvec_iter *iter) +{ + unsigned int len = bi->metadata_size - bi->pi_offset; + + if (likely(ptuple != tuple)) { + kunmap_local(ptuple); + } else { + __blk_integrity_copy_from_tuple(bip, iter, ptuple, + bi->pi_tuple_size); + len -= bi->pi_tuple_size; + } + + bvec_iter_advance(bip->bip_vec, iter, len); + return tuple; +} + +static void blk_set_ext_unmap_ref(void *prot_buf, u64 virt, u64 ref_tag) +{ + struct crc64_pi_tuple *pi = prot_buf; + + if (get_unaligned_be48(&pi->ref_tag) == lower_48_bits(ref_tag)) + put_unaligned_be48(virt, pi->ref_tag); +} + +static void blk_set_t10_unmap_ref(void *prot_buf, u32 virt, u32 ref_tag) +{ + struct t10_pi_tuple *pi = prot_buf; + u32 ref; + + if (unlikely((unsigned long)pi & (sizeof(*pi) - 1))) + ref = get_unaligned_be32(&pi->ref_tag); + else + ref = be32_to_cpu(pi->ref_tag); + + if (ref != ref_tag) + return; + + if (unlikely((unsigned long)pi & (sizeof(*pi) - 1))) + put_unaligned_be32(virt, &pi->ref_tag); + else + pi->ref_tag = cpu_to_be32(virt); +} + +static void blk_reftag_remap_complete(struct blk_integrity *bi, void *tuple, u64 virt, + u64 ref) +{ + switch (bi->csum_type) { + case BLK_INTEGRITY_CSUM_CRC64: + blk_set_ext_unmap_ref(tuple, virt, ref); + break; + case BLK_INTEGRITY_CSUM_CRC: + case BLK_INTEGRITY_CSUM_IP: + blk_set_t10_unmap_ref(tuple, virt, ref); + break; + default: + WARN_ON_ONCE(1); + break; } } +static void blk_set_ext_map_ref(void *prot_buf, u64 virt, u64 ref_tag) +{ + struct crc64_pi_tuple *pi = prot_buf; + + if (get_unaligned_be48(&pi->ref_tag) == lower_48_bits(virt)) + put_unaligned_be48(ref_tag, pi->ref_tag); +} + +static void blk_set_t10_map_ref(void *prot_buf, u32 virt, u32 ref_tag) +{ + struct t10_pi_tuple *pi = prot_buf; + u32 ref; + + if (unlikely((unsigned long)pi & (sizeof(*pi) - 1))) + ref = get_unaligned_be32(&pi->ref_tag); + else + ref = be32_to_cpu(pi->ref_tag); + + if (ref != virt) + return; + + if (unlikely((unsigned long)pi & (sizeof(*pi) - 1))) + put_unaligned_be32(ref_tag, &pi->ref_tag); + else + pi->ref_tag = cpu_to_be32(ref_tag); +} + +static void blk_reftag_remap_prepare(struct blk_integrity *bi, void *tuple, u64 virt, + u64 ref) +{ + switch (bi->csum_type) { + case BLK_INTEGRITY_CSUM_CRC64: + blk_set_ext_map_ref(tuple, virt, ref); + break; + case BLK_INTEGRITY_CSUM_CRC: + case BLK_INTEGRITY_CSUM_IP: + blk_set_t10_map_ref(tuple, virt, ref); + break; + default: + WARN_ON_ONCE(1); + break; + } +} + +static void blk_reftag_remap(struct bio *bio, struct blk_integrity *bi, + u64 *ref, bool prep) +{ + struct bio_integrity_payload *bip = bio_integrity(bio); + struct bvec_iter iter = bip->bip_iter; + u64 virt = bip_get_seed(bip); + union pi_tuple tuple; + void *ptuple; + + while (iter.bi_size) { + ptuple = blk_tuple_remap_start(&tuple, bi, bip, &iter); + + if (prep) + blk_reftag_remap_prepare(bi, ptuple, virt, *ref); + else + blk_reftag_remap_complete(bi, ptuple, virt, *ref); + + blk_tuple_remap_end(&tuple, ptuple, bi, bip, &iter); + (*ref)++; + virt++; + } +} + +static inline unsigned int pi_shift(struct request_queue *q) +{ + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && + q->limits.integrity.interval_exp) + return q->limits.integrity.interval_exp; + else + return ilog2(queue_logical_block_size(q)); +} + +static inline u64 pi_ref_tag(struct request *rq) +{ + return blk_rq_pos(rq) >> (pi_shift(rq->q) - SECTOR_SHIFT); +} + void blk_integrity_prepare(struct request *rq) { struct blk_integrity *bi = &rq->q->limits.integrity; + unsigned int shift = pi_shift(rq->q); + u64 ref = pi_ref_tag(rq); + struct bio *bio; if (!(bi->flags & BLK_INTEGRITY_REF_TAG)) return; - if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC64) - ext_pi_type1_prepare(rq); - else - t10_pi_type1_prepare(rq); + __rq_for_each_bio(bio, rq) { + struct bio_integrity_payload *bip = bio_integrity(bio); + + if (bip->bip_flags & BIP_MAPPED_INTEGRITY) { + ref += bio->bi_iter.bi_size >> shift; + continue; + } + + blk_reftag_remap(bio, bi, &ref, true); + bip->bip_flags |= BIP_MAPPED_INTEGRITY; + } } +/* + * This MUST be called before any bio_advance occurs on the request. + */ void blk_integrity_complete(struct request *rq, unsigned int nr_bytes) { struct blk_integrity *bi = &rq->q->limits.integrity; + u64 ref = pi_ref_tag(rq); + struct bio *bio; if (!(bi->flags & BLK_INTEGRITY_REF_TAG)) return; - if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC64) - ext_pi_type1_complete(rq, nr_bytes); - else - t10_pi_type1_complete(rq, nr_bytes); + __rq_for_each_bio(bio, rq) + blk_reftag_remap(bio, bi, &ref, false); } diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h index 2c59fe3efcd42..7003ea2c78d18 100644 --- a/include/linux/t10-pi.h +++ b/include/linux/t10-pi.h @@ -34,8 +34,8 @@ struct t10_pi_tuple { __be32 ref_tag; /* Target LBA or indirect LBA */ }; -#define T10_PI_APP_ESCAPE cpu_to_be16(0xffff) -#define T10_PI_REF_ESCAPE cpu_to_be32(0xffffffff) +#define T10_PI_APP_ESCAPE 0xffff +#define T10_PI_REF_ESCAPE 0xffffffff static inline u32 t10_pi_ref_tag(struct request *rq) { -- 2.47.3