When dax_fc_bytelog is enabled, write fast commit TLVs directly into the DAX-mapped ByteLog ring. Keep traditional TLV writes confined to the reserved FC block and emit an anchor TLV to describe the ByteLog window. Signed-off-by: Li Chen --- fs/ext4/fast_commit.c | 124 +++++++++++++++++++++++++++++++++- fs/ext4/fast_commit.h | 13 ++++ fs/ext4/fast_commit_bytelog.c | 20 ++++++ fs/ext4/fast_commit_bytelog.h | 5 ++ 4 files changed, 159 insertions(+), 3 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 64c0c4ba58b0..2f7b7ea29df2 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -723,6 +723,12 @@ static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc) * leaving enough space for a PAD tlv. */ remaining = bsize - EXT4_FC_TAG_BASE_LEN - off; + if (ext4_fc_bytelog_active(sbi) && len > remaining) { + ext4_fc_mark_ineligible(sb, + EXT4_FC_REASON_BYTELOG_TLV_OVERFLOW, + NULL); + return NULL; + } if (len <= remaining) { sbi->s_fc_bytes += len; return dst; @@ -806,6 +812,31 @@ static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val, struct ext4_fc_tl tl; u8 *dst; + if (ext4_fc_bytelog_active(EXT4_SB(sb)) && + (tag == EXT4_FC_TAG_ADD_RANGE || tag == EXT4_FC_TAG_DEL_RANGE || + tag == EXT4_FC_TAG_LINK || tag == EXT4_FC_TAG_UNLINK || + tag == EXT4_FC_TAG_CREAT || tag == EXT4_FC_TAG_INODE)) { + struct ext4_fc_bytelog_vec vecs[2]; + int ret; + + tl.fc_tag = cpu_to_le16(tag); + tl.fc_len = cpu_to_le16(len); + vecs[0].base = &tl; + vecs[0].len = sizeof(tl); + vecs[1].base = val; + vecs[1].len = len; + + ret = ext4_fc_bytelog_append_vec(sb, tag, vecs, + ARRAY_SIZE(vecs)); + if (!ret) + return true; + if (ret == -ENOSPC) + ext4_fc_mark_ineligible(sb, + EXT4_FC_REASON_BYTELOG_TLV_OVERFLOW, + NULL); + return false; + } + dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + len, crc); if (!dst) return false; @@ -819,6 +850,17 @@ static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val, return true; } +static bool ext4_fc_add_bytelog_anchor_tlv(struct super_block *sb, + struct ext4_fc_bytelog_anchor *anchor, + u32 *crc) +{ + struct ext4_fc_bytelog_entry entry; + + ext4_fc_bytelog_anchor_to_disk(&entry, anchor); + return ext4_fc_add_tlv(sb, EXT4_FC_TAG_DAX_BYTELOG_ANCHOR, + sizeof(entry), (u8 *)&entry, crc); +} + /* Same as above, but adds dentry tlv. */ static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc, struct ext4_fc_dentry_update *fc_dentry) @@ -826,9 +868,40 @@ static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc, struct ext4_fc_dentry_info fcd; struct ext4_fc_tl tl; int dlen = fc_dentry->fcd_name.name.len; - u8 *dst = ext4_fc_reserve_space(sb, - EXT4_FC_TAG_BASE_LEN + sizeof(fcd) + dlen, crc); + u8 *dst; + + if (ext4_fc_bytelog_active(EXT4_SB(sb)) && + (fc_dentry->fcd_op == EXT4_FC_TAG_LINK || + fc_dentry->fcd_op == EXT4_FC_TAG_UNLINK || + fc_dentry->fcd_op == EXT4_FC_TAG_CREAT)) { + struct ext4_fc_bytelog_vec vecs[3]; + int ret; + + fcd.fc_parent_ino = cpu_to_le32(fc_dentry->fcd_parent); + fcd.fc_ino = cpu_to_le32(fc_dentry->fcd_ino); + tl.fc_tag = cpu_to_le16(fc_dentry->fcd_op); + tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen); + + vecs[0].base = &tl; + vecs[0].len = sizeof(tl); + vecs[1].base = &fcd; + vecs[1].len = sizeof(fcd); + vecs[2].base = fc_dentry->fcd_name.name.name; + vecs[2].len = dlen; + + ret = ext4_fc_bytelog_append_vec(sb, fc_dentry->fcd_op, vecs, + ARRAY_SIZE(vecs)); + if (!ret) + return true; + if (ret == -ENOSPC) + ext4_fc_mark_ineligible(sb, + EXT4_FC_REASON_BYTELOG_TLV_OVERFLOW, + NULL); + return false; + } + dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + sizeof(fcd) + + dlen, crc); if (!dst) return false; @@ -872,6 +945,25 @@ static int ext4_fc_write_inode(struct inode *inode, u32 *crc) tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE); tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino)); + if (ext4_fc_bytelog_active(EXT4_SB(inode->i_sb))) { + struct ext4_fc_bytelog_vec vecs[3]; + + vecs[0].base = &tl; + vecs[0].len = sizeof(tl); + vecs[1].base = &fc_inode.fc_ino; + vecs[1].len = sizeof(fc_inode.fc_ino); + vecs[2].base = ext4_raw_inode(&iloc); + vecs[2].len = inode_len; + + ret = ext4_fc_bytelog_append_vec(inode->i_sb, EXT4_FC_TAG_INODE, + vecs, ARRAY_SIZE(vecs)); + if (ret == -ENOSPC) + ext4_fc_mark_ineligible(inode->i_sb, + EXT4_FC_REASON_BYTELOG_TLV_OVERFLOW, + NULL); + goto err; + } + ret = -ECANCELED; dst = ext4_fc_reserve_space(inode->i_sb, EXT4_FC_TAG_BASE_LEN + inode_len + sizeof(fc_inode.fc_ino), crc); @@ -1147,6 +1239,8 @@ static int ext4_fc_perform_commit(journal_t *journal) } /* Step 6.2: Now write all the dentry updates. */ + if (ext4_fc_bytelog_active(sbi)) + ext4_fc_bytelog_begin_commit(sb); ret = ext4_fc_commit_dentry_updates(journal, &crc); if (ret) goto out; @@ -1164,6 +1258,22 @@ static int ext4_fc_perform_commit(journal_t *journal) if (ret) goto out; } + + if (ext4_fc_bytelog_active(sbi)) { + struct ext4_fc_bytelog_anchor anchor; + + ret = ext4_fc_bytelog_end_commit(sb); + if (ret) + goto out; + if (sbi->s_fc_bytelog.seq) { + ext4_fc_bytelog_build_anchor(sb, &anchor, + sbi->s_journal->j_running_transaction->t_tid); + if (!ext4_fc_add_bytelog_anchor_tlv(sb, &anchor, &crc)) { + ret = -ENOSPC; + goto out; + } + } + } /* Step 6.4: Finally write tail tag to conclude this fast commit. */ ret = ext4_fc_write_tail(sb, crc); @@ -1262,6 +1372,12 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid) else journal_ioprio = EXT4_DEF_JOURNAL_IOPRIO; set_task_ioprio(current, journal_ioprio); + + if (ext4_fc_bytelog_active(sbi)) { + journal->j_fc_off = 0; + sbi->s_fc_bytes = 0; + } + fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize; ret = ext4_fc_perform_commit(journal); if (ret < 0) { @@ -1367,8 +1483,9 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid) ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); } - if (full) + if (full || ext4_fc_bytelog_active(sbi)) sbi->s_fc_bytes = 0; + ext4_fc_bytelog_reset(sb, full); ext4_fc_unlock(sb, alloc_ctx); trace_ext4_fc_stats(sb); } @@ -2315,6 +2432,7 @@ static const char * const fc_ineligible_reasons[] = { [EXT4_FC_REASON_FALLOC_RANGE] = "Falloc range op", [EXT4_FC_REASON_INODE_JOURNAL_DATA] = "Data journalling", [EXT4_FC_REASON_ENCRYPTED_FILENAME] = "Encrypted filename", + [EXT4_FC_REASON_BYTELOG_TLV_OVERFLOW] = "ByteLog TLV overflow", [EXT4_FC_REASON_MIGRATE] = "Inode format migration", [EXT4_FC_REASON_VERITY] = "fs-verity enable", [EXT4_FC_REASON_MOVE_EXT] = "Move extents", diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h index 2f77a37fb101..fb51e19b9778 100644 --- a/fs/ext4/fast_commit.h +++ b/fs/ext4/fast_commit.h @@ -18,6 +18,7 @@ #define EXT4_FC_TAG_PAD 0x0007 #define EXT4_FC_TAG_TAIL 0x0008 #define EXT4_FC_TAG_HEAD 0x0009 +#define EXT4_FC_TAG_DAX_BYTELOG_ANCHOR 0x000a #define EXT4_FC_SUPPORTED_FEATURES 0x0 @@ -70,6 +71,15 @@ struct ext4_fc_tail { __le32 fc_crc; }; +/* Value structure for tag EXT4_FC_TAG_DAX_BYTELOG_ANCHOR. */ +struct ext4_fc_bytelog_entry { + __le32 fc_tid; + __le64 fc_head; + __le64 fc_tail; + __le64 fc_seq; + __le32 fc_crc; +}; + /* Tag base length */ #define EXT4_FC_TAG_BASE_LEN (sizeof(struct ext4_fc_tl)) @@ -97,6 +107,7 @@ enum { EXT4_FC_REASON_FALLOC_RANGE, EXT4_FC_REASON_INODE_JOURNAL_DATA, EXT4_FC_REASON_ENCRYPTED_FILENAME, + EXT4_FC_REASON_BYTELOG_TLV_OVERFLOW, EXT4_FC_REASON_MIGRATE, EXT4_FC_REASON_VERITY, EXT4_FC_REASON_MOVE_EXT, @@ -181,6 +192,8 @@ static inline const char *tag2str(__u16 tag) return "TAIL"; case EXT4_FC_TAG_HEAD: return "HEAD"; + case EXT4_FC_TAG_DAX_BYTELOG_ANCHOR: + return "BYTELOG_ANCHOR"; default: return "ERROR"; } diff --git a/fs/ext4/fast_commit_bytelog.c b/fs/ext4/fast_commit_bytelog.c index 64ba3edddbcb..77ac1d9ef031 100644 --- a/fs/ext4/fast_commit_bytelog.c +++ b/fs/ext4/fast_commit_bytelog.c @@ -455,6 +455,26 @@ void ext4_fc_bytelog_release(struct super_block *sb) memset(&sbi->s_fc_bytelog, 0, sizeof(sbi->s_fc_bytelog)); } +void ext4_fc_bytelog_anchor_to_disk(struct ext4_fc_bytelog_entry *dst, + const struct ext4_fc_bytelog_anchor *src) +{ + dst->fc_tid = cpu_to_le32(src->tid); + dst->fc_head = cpu_to_le64(src->head); + dst->fc_tail = cpu_to_le64(src->tail); + dst->fc_seq = cpu_to_le64(src->seq); + dst->fc_crc = cpu_to_le32(src->crc); +} + +void ext4_fc_bytelog_anchor_from_disk(struct ext4_fc_bytelog_anchor *dst, + const struct ext4_fc_bytelog_entry *src) +{ + dst->tid = le32_to_cpu(src->fc_tid); + dst->head = le64_to_cpu(src->fc_head); + dst->tail = le64_to_cpu(src->fc_tail); + dst->seq = le64_to_cpu(src->fc_seq); + dst->crc = le32_to_cpu(src->fc_crc); +} + void ext4_fc_bytelog_reset(struct super_block *sb, bool full) { struct ext4_fc_bytelog *log = &EXT4_SB(sb)->s_fc_bytelog; diff --git a/fs/ext4/fast_commit_bytelog.h b/fs/ext4/fast_commit_bytelog.h index d52754890222..d3e5b734a02e 100644 --- a/fs/ext4/fast_commit_bytelog.h +++ b/fs/ext4/fast_commit_bytelog.h @@ -9,6 +9,7 @@ struct super_block; struct journal_s; struct ext4_sb_info; +struct ext4_fc_bytelog_entry; #define EXT4_FC_BYTELOG_MAGIC 0x4c424346 /* "FCBL" */ #define EXT4_FC_BYTELOG_VERSION 1 @@ -109,6 +110,10 @@ int ext4_fc_bytelog_append_vec(struct super_block *sb, u16 tag, void ext4_fc_bytelog_build_anchor(struct super_block *sb, struct ext4_fc_bytelog_anchor *anchor, u32 tid); +void ext4_fc_bytelog_anchor_to_disk(struct ext4_fc_bytelog_entry *dst, + const struct ext4_fc_bytelog_anchor *src); +void ext4_fc_bytelog_anchor_from_disk(struct ext4_fc_bytelog_anchor *dst, + const struct ext4_fc_bytelog_entry *src); static inline bool ext4_fc_bytelog_record_committed(const struct ext4_fc_bytelog_hdr *hdr) { -- 2.52.0