Offload XFS writeback to per-AG workers based on the inode dirty-AG bitmap. Each worker scans and submits writeback only for folios belonging to its AG. Signed-off-by: Kundan Kumar Signed-off-by: Anuj Gupta --- fs/xfs/xfs_aops.c | 178 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 9d5b65922cd2..55c3154fb2b5 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -678,6 +678,180 @@ xfs_zoned_writeback_submit( return 0; } +static bool xfs_agp_match(struct xfs_inode *ip, pgoff_t index, + xfs_agnumber_t agno) +{ + void *ent; + u32 v; + bool match = false; + + ent = xa_load(&ip->i_ag_pmap, index); + if (ent && xa_is_value(ent)) { + v = xa_to_value(ent); + if (xfs_agp_valid(v)) + match = (xfs_agp_agno(v) == (u32)agno); + } + + return match; +} + +static bool xfs_folio_matches_ag(struct folio *folio, xfs_agnumber_t agno) +{ + struct xfs_inode *ip = XFS_I(folio_mapping(folio)->host); + + return xfs_agp_match(ip, folio->index, agno); +} + +static int xfs_writepages_ag(struct xfs_inode *ip, + struct writeback_control *wbc, + xfs_agnumber_t agno) +{ + struct inode *inode = VFS_I(ip); + struct address_space *mapping = inode->i_mapping; + struct folio_batch *fbatch = &wbc->fbatch; + int ret = 0; + pgoff_t index, end; + + wbc->range_cyclic = 0; + + folio_batch_init(fbatch); + index = wbc->range_start >> PAGE_SHIFT; + end = wbc->range_end >> PAGE_SHIFT; + + struct xfs_writepage_ctx wpc = { + .ctx = { + .inode = inode, + .wbc = wbc, + .ops = &xfs_writeback_ops, + }, + }; + + while (index <= end) { + int i, nr; + + /* get a batch of DIRTY folios starting at index */ + nr = filemap_get_folios_tag(mapping, &index, end, + PAGECACHE_TAG_DIRTY, fbatch); + if (!nr) + break; + + for (i = 0; i < nr; i++) { + struct folio *folio = fbatch->folios[i]; + + /* Filter BEFORE locking */ + if (!xfs_folio_matches_ag(folio, agno)) + continue; + + folio_lock(folio); + + /* + * Now it's ours: clear dirty and submit. + * This prevents *this AG worker* from seeing it again + * next time. + */ + if (!folio_clear_dirty_for_io(folio)) { + folio_unlock(folio); + continue; + } + xa_erase(&ip->i_ag_pmap, folio->index); + + ret = iomap_writeback_folio(&wpc.ctx, folio); + folio_unlock(folio); + + if (ret) { + folio_batch_release(fbatch); + goto out; + } + } + + folio_batch_release(fbatch); + cond_resched(); + } + +out: + if (wpc.ctx.wb_ctx && wpc.ctx.ops && wpc.ctx.ops->writeback_submit) + wpc.ctx.ops->writeback_submit(&wpc.ctx, ret); + + return ret; +} + +static void xfs_ag_writeback_work(struct work_struct *work) +{ + struct xfs_ag_wb *awb = container_of(to_delayed_work(work), + struct xfs_ag_wb, ag_work); + struct xfs_ag_wb_task *task; + struct xfs_mount *mp; + struct inode *inode; + struct xfs_inode *ip; + int ret; + + for (;;) { + spin_lock(&awb->lock); + task = list_first_entry_or_null(&awb->task_list, + struct xfs_ag_wb_task, list); + if (task) + list_del_init(&task->list); + spin_unlock(&awb->lock); + + if (!task) + break; + + ip = task->ip; + mp = ip->i_mount; + inode = VFS_I(ip); + + ret = xfs_writepages_ag(ip, &task->wbc, task->agno); + + /* If didn't submit everything for this AG, set its bit */ + if (ret) + set_bit(task->agno, ip->i_ag_dirty_bitmap); + + iput(inode); /* drop igrab */ + mempool_free(task, mp->m_ag_task_pool); + } +} + +static int xfs_vm_writepages_offload(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct inode *inode = mapping->host; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + struct xfs_ag_wb *awb; + struct xfs_ag_wb_task *task; + xfs_agnumber_t agno; + + if (!ip->i_ag_dirty_bits) + return 0; + + for_each_set_bit(agno, ip->i_ag_dirty_bitmap, ip->i_ag_dirty_bits) { + if (!test_and_clear_bit(agno, ip->i_ag_dirty_bitmap)) + continue; + + task = mempool_alloc(mp->m_ag_task_pool, GFP_NOFS); + if (!task) { + set_bit(agno, ip->i_ag_dirty_bitmap); + continue; + } + + INIT_LIST_HEAD(&task->list); + task->ip = ip; + task->agno = agno; + task->wbc = *wbc; + igrab(inode); /* worker owns inode ref */ + + awb = &mp->m_ag_wb[agno]; + + spin_lock(&awb->lock); + list_add_tail(&task->list, &awb->task_list); + spin_unlock(&awb->lock); + + mod_delayed_work(mp->m_ag_wq, &awb->ag_work, 0); + } + + return 0; +} + static const struct iomap_writeback_ops xfs_zoned_writeback_ops = { .writeback_range = xfs_zoned_writeback_range, .writeback_submit = xfs_zoned_writeback_submit, @@ -706,6 +880,7 @@ xfs_init_ag_writeback(struct xfs_mount *mp) for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { struct xfs_ag_wb *awb = &mp->m_ag_wb[agno]; + INIT_DELAYED_WORK(&awb->ag_work, xfs_ag_writeback_work); spin_lock_init(&awb->lock); INIT_LIST_HEAD(&awb->task_list); awb->agno = agno; @@ -769,6 +944,9 @@ xfs_vm_writepages( xfs_open_zone_put(xc.open_zone); return error; } else { + if (wbc->sync_mode != WB_SYNC_ALL) + return xfs_vm_writepages_offload(mapping, wbc); + struct xfs_writepage_ctx wpc = { .ctx = { .inode = mapping->host, -- 2.25.1