jbd2_inode fields are updated under journal->j_list_lock, but some paths read them without holding the lock (e.g. fast commit helpers and ordered truncate helpers). READ_ONCE() alone is not sufficient for the dirty range fields when they are stored as loff_t because 32-bit platforms can observe torn loads. Store the dirty range in PAGE_SIZE units as pgoff_t instead. Use READ_ONCE() on the read side and WRITE_ONCE() on the write side for the dirty range and i_flags to match the existing lockless access pattern. Suggested-by: Jan Kara Reviewed-by: Jan Kara Signed-off-by: Li Chen --- Changes since v2: - Rename i_dirty_start/end to i_dirty_start_page/end_page. - Use jbd2_jinode_get_dirty_range() for byte conversions in commit paths. fs/jbd2/commit.c | 56 +++++++++++++++++++++++++++++++++---------- fs/jbd2/journal.c | 5 ++-- fs/jbd2/transaction.c | 20 ++++++++++------ include/linux/jbd2.h | 31 ++++++++++++++---------- 4 files changed, 77 insertions(+), 35 deletions(-) diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 7203d2d2624d7..514f204aa1db1 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -180,7 +180,13 @@ static int journal_wait_on_commit_record(journal_t *journal, /* Send all the data buffers related to an inode */ int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode) { - if (!jinode || !(jinode->i_flags & JI_WRITE_DATA)) + unsigned long flags; + + if (!jinode) + return 0; + + flags = READ_ONCE(jinode->i_flags); + if (!(flags & JI_WRITE_DATA)) return 0; trace_jbd2_submit_inode_data(jinode->i_vfs_inode); @@ -191,12 +197,30 @@ EXPORT_SYMBOL(jbd2_submit_inode_data); int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode) { - if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) || - !jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping) + struct address_space *mapping; + struct inode *inode; + unsigned long flags; + loff_t start_byte, end_byte; + + if (!jinode) + return 0; + + flags = READ_ONCE(jinode->i_flags); + if (!(flags & JI_WAIT_DATA)) + return 0; + + inode = jinode->i_vfs_inode; + if (!inode) + return 0; + + mapping = inode->i_mapping; + if (!mapping) + return 0; + + if (!jbd2_jinode_get_dirty_range(jinode, &start_byte, &end_byte)) return 0; return filemap_fdatawait_range_keep_errors( - jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start, - jinode->i_dirty_end); + mapping, start_byte, end_byte); } EXPORT_SYMBOL(jbd2_wait_inode_data); @@ -218,7 +242,8 @@ static int journal_submit_data_buffers(journal_t *journal, list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { if (!(jinode->i_flags & JI_WRITE_DATA)) continue; - jinode->i_flags |= JI_COMMIT_RUNNING; + WRITE_ONCE(jinode->i_flags, + jinode->i_flags | JI_COMMIT_RUNNING); spin_unlock(&journal->j_list_lock); /* submit the inode data buffers. */ trace_jbd2_submit_inode_data(jinode->i_vfs_inode); @@ -229,7 +254,8 @@ static int journal_submit_data_buffers(journal_t *journal, } spin_lock(&journal->j_list_lock); J_ASSERT(jinode->i_transaction == commit_transaction); - jinode->i_flags &= ~JI_COMMIT_RUNNING; + WRITE_ONCE(jinode->i_flags, + jinode->i_flags & ~JI_COMMIT_RUNNING); smp_mb(); wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); } @@ -240,10 +266,13 @@ static int journal_submit_data_buffers(journal_t *journal, int jbd2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode) { struct address_space *mapping = jinode->i_vfs_inode->i_mapping; + loff_t start_byte, end_byte; + + if (!jbd2_jinode_get_dirty_range(jinode, &start_byte, &end_byte)) + return 0; return filemap_fdatawait_range_keep_errors(mapping, - jinode->i_dirty_start, - jinode->i_dirty_end); + start_byte, end_byte); } /* @@ -262,7 +291,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal, list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { if (!(jinode->i_flags & JI_WAIT_DATA)) continue; - jinode->i_flags |= JI_COMMIT_RUNNING; + WRITE_ONCE(jinode->i_flags, jinode->i_flags | JI_COMMIT_RUNNING); spin_unlock(&journal->j_list_lock); /* wait for the inode data buffers writeout. */ if (journal->j_finish_inode_data_buffers) { @@ -272,7 +301,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal, } cond_resched(); spin_lock(&journal->j_list_lock); - jinode->i_flags &= ~JI_COMMIT_RUNNING; + WRITE_ONCE(jinode->i_flags, jinode->i_flags & ~JI_COMMIT_RUNNING); smp_mb(); wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); } @@ -288,8 +317,9 @@ static int journal_finish_inode_data_buffers(journal_t *journal, &jinode->i_transaction->t_inode_list); } else { jinode->i_transaction = NULL; - jinode->i_dirty_start = 0; - jinode->i_dirty_end = 0; + WRITE_ONCE(jinode->i_dirty_start_page, 0); + WRITE_ONCE(jinode->i_dirty_end_page, + JBD2_INODE_DIRTY_RANGE_NONE); } } spin_unlock(&journal->j_list_lock); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index c973162d5b316..eb26c3088a164 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -3020,8 +3020,8 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode) jinode->i_next_transaction = NULL; jinode->i_vfs_inode = inode; jinode->i_flags = 0; - jinode->i_dirty_start = 0; - jinode->i_dirty_end = 0; + jinode->i_dirty_start_page = 0; + jinode->i_dirty_end_page = JBD2_INODE_DIRTY_RANGE_NONE; INIT_LIST_HEAD(&jinode->i_list); } @@ -3178,4 +3178,3 @@ MODULE_DESCRIPTION("Generic filesystem journal-writing module"); MODULE_LICENSE("GPL"); module_init(journal_init); module_exit(journal_exit); - diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index dca4b5d8aaaa3..f5226b6d47d24 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -2646,6 +2646,7 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode, { transaction_t *transaction = handle->h_transaction; journal_t *journal; + pgoff_t start_page, end_page; if (is_handle_aborted(handle)) return -EROFS; @@ -2654,15 +2655,20 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode, jbd2_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, transaction->t_tid); + start_page = (pgoff_t)(start_byte >> PAGE_SHIFT); + end_page = (pgoff_t)(end_byte >> PAGE_SHIFT); + spin_lock(&journal->j_list_lock); - jinode->i_flags |= flags; + WRITE_ONCE(jinode->i_flags, jinode->i_flags | flags); - if (jinode->i_dirty_end) { - jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte); - jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte); + if (jinode->i_dirty_end_page != JBD2_INODE_DIRTY_RANGE_NONE) { + WRITE_ONCE(jinode->i_dirty_start_page, + min(jinode->i_dirty_start_page, start_page)); + WRITE_ONCE(jinode->i_dirty_end_page, + max(jinode->i_dirty_end_page, end_page)); } else { - jinode->i_dirty_start = start_byte; - jinode->i_dirty_end = end_byte; + WRITE_ONCE(jinode->i_dirty_start_page, start_page); + WRITE_ONCE(jinode->i_dirty_end_page, end_page); } /* Is inode already attached where we need it? */ @@ -2739,7 +2745,7 @@ int jbd2_journal_begin_ordered_truncate(journal_t *journal, int ret = 0; /* This is a quick check to avoid locking if not necessary */ - if (!jinode->i_transaction) + if (!READ_ONCE(jinode->i_transaction)) goto out; /* Locks are here just to force reading of recent values, it is * enough that the transaction was not committing before we started diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 64392baf5f4b4..4fffbd13d38d4 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -390,6 +390,8 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) /* Wait for outstanding data writes for this inode before commit */ #define JI_WAIT_DATA (1 << __JI_WAIT_DATA) +#define JBD2_INODE_DIRTY_RANGE_NONE ((pgoff_t)-1) + /** * struct jbd2_inode - The jbd_inode type is the structure linking inodes in * ordered mode present in a transaction so that we can sync them during commit. @@ -429,33 +431,38 @@ struct jbd2_inode { unsigned long i_flags; /** - * @i_dirty_start: + * @i_dirty_start_page: + * + * Dirty range start in PAGE_SIZE units. + * + * The dirty range is empty if @i_dirty_end_page is set to + * %JBD2_INODE_DIRTY_RANGE_NONE. * - * Offset in bytes where the dirty range for this inode starts. * [j_list_lock] */ - loff_t i_dirty_start; + pgoff_t i_dirty_start_page; /** - * @i_dirty_end: + * @i_dirty_end_page: * - * Inclusive offset in bytes where the dirty range for this inode - * ends. [j_list_lock] + * Dirty range end in PAGE_SIZE units (inclusive). + * + * [j_list_lock] */ - loff_t i_dirty_end; + pgoff_t i_dirty_end_page; }; static inline bool jbd2_jinode_get_dirty_range(const struct jbd2_inode *jinode, loff_t *start, loff_t *end) { - loff_t start_byte = jinode->i_dirty_start; - loff_t end_byte = jinode->i_dirty_end; + pgoff_t start_page = READ_ONCE(jinode->i_dirty_start_page); + pgoff_t end_page = READ_ONCE(jinode->i_dirty_end_page); - if (!end_byte) + if (end_page == JBD2_INODE_DIRTY_RANGE_NONE) return false; - *start = start_byte; - *end = end_byte; + *start = (loff_t)start_page << PAGE_SHIFT; + *end = ((loff_t)end_page << PAGE_SHIFT) + PAGE_SIZE - 1; return true; } -- 2.52.0