1. Introduce CEPH_SUBVOLUME_ID_NONE constant (value 0) to make the unknown/unset state explicit and self-documenting. 2. Add WARN_ON_ONCE if attempting to change an already-set subvolume_id. An inode's subvolume membership is immutable - once created in a subvolume, it stays there. Attempting to change it indicates a bug. --- fs/ceph/inode.c | 32 +++++++++++++++++++++++++------- fs/ceph/mds_client.c | 5 +---- fs/ceph/subvolume_metrics.c | 7 ++++--- fs/ceph/super.h | 10 +++++++++- 4 files changed, 39 insertions(+), 15 deletions(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 835049004047..257b3e27b741 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -638,7 +638,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_max_bytes = 0; ci->i_max_files = 0; - ci->i_subvolume_id = 0; + ci->i_subvolume_id = CEPH_SUBVOLUME_ID_NONE; memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); memset(&ci->i_cached_layout, 0, sizeof(ci->i_cached_layout)); @@ -743,7 +743,7 @@ void ceph_evict_inode(struct inode *inode) percpu_counter_dec(&mdsc->metric.total_inodes); - ci->i_subvolume_id = 0; + ci->i_subvolume_id = CEPH_SUBVOLUME_ID_NONE; netfs_wait_for_outstanding_io(inode); truncate_inode_pages_final(&inode->i_data); @@ -877,19 +877,37 @@ int ceph_fill_file_size(struct inode *inode, int issued, } /* - * Set the subvolume ID for an inode. Following the FUSE client convention, - * 0 means unknown/unset (MDS only sends non-zero IDs for subvolume inodes). + * Set the subvolume ID for an inode. + * + * The subvolume_id identifies which CephFS subvolume this inode belongs to. + * CEPH_SUBVOLUME_ID_NONE (0) means unknown/unset - the MDS only sends + * non-zero IDs for inodes within subvolumes. + * + * An inode's subvolume membership is immutable - once an inode is created + * in a subvolume, it stays there. Therefore, if we already have a valid + * (non-zero) subvolume_id and receive a different one, that indicates a bug. */ void ceph_inode_set_subvolume(struct inode *inode, u64 subvolume_id) { struct ceph_inode_info *ci; + u64 old; - if (!inode || !subvolume_id) + if (!inode || subvolume_id == CEPH_SUBVOLUME_ID_NONE) return; ci = ceph_inode(inode); - if (READ_ONCE(ci->i_subvolume_id) != subvolume_id) - WRITE_ONCE(ci->i_subvolume_id, subvolume_id); + old = READ_ONCE(ci->i_subvolume_id); + + if (old == subvolume_id) + return; + + if (old != CEPH_SUBVOLUME_ID_NONE) { + /* subvolume_id should not change once set */ + WARN_ON_ONCE(1); + return; + } + + WRITE_ONCE(ci->i_subvolume_id, subvolume_id); } void ceph_fill_file_time(struct inode *inode, int issued, diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 2b831f48c844..f2a17e11fcef 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -122,10 +122,7 @@ static int parse_reply_info_in(void **p, void *end, u32 struct_len = 0; struct ceph_client *cl = mdsc ? mdsc->fsc->client : NULL; - info->subvolume_id = 0; - doutc(cl, "subv_metric parse start features=0x%llx\n", features); - - info->subvolume_id = 0; + info->subvolume_id = CEPH_SUBVOLUME_ID_NONE; if (features == (u64)-1) { ceph_decode_8_safe(p, end, struct_v, bad); diff --git a/fs/ceph/subvolume_metrics.c b/fs/ceph/subvolume_metrics.c index 111f6754e609..37cbed5b52c3 100644 --- a/fs/ceph/subvolume_metrics.c +++ b/fs/ceph/subvolume_metrics.c @@ -136,8 +136,9 @@ void ceph_subvolume_metrics_record(struct ceph_subvolume_metrics_tracker *tracke struct ceph_subvol_metric_rb_entry *entry, *new_entry = NULL; bool retry = false; - /* 0 means unknown/unset subvolume (matches FUSE client convention) */ - if (!READ_ONCE(tracker->enabled) || !subvol_id || !size || !latency_us) + /* CEPH_SUBVOLUME_ID_NONE (0) means unknown/unset subvolume */ + if (!READ_ONCE(tracker->enabled) || + subvol_id == CEPH_SUBVOLUME_ID_NONE || !size || !latency_us) return; do { @@ -403,7 +404,7 @@ void ceph_subvolume_metrics_record_io(struct ceph_mds_client *mdsc, } subvol_id = READ_ONCE(ci->i_subvolume_id); - if (!subvol_id) { + if (subvol_id == CEPH_SUBVOLUME_ID_NONE) { atomic64_inc(&tracker->record_no_subvol); return; } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index a03c373efd52..731df0fcbcc8 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -386,7 +386,15 @@ struct ceph_inode_info { /* quotas */ u64 i_max_bytes, i_max_files; - u64 i_subvolume_id; /* 0 = unknown/unset, matches FUSE client */ + + /* + * Subvolume ID this inode belongs to. CEPH_SUBVOLUME_ID_NONE (0) + * means unknown/unset, matching the FUSE client convention. + * Once set to a valid (non-zero) value, it should not change + * during the inode's lifetime. + */ +#define CEPH_SUBVOLUME_ID_NONE 0 + u64 i_subvolume_id; s32 i_dir_pin; -- 2.34.1