Now that we have moved the swap count state to virtual swap layer, each swap map entry only has 3 possible states: free, allocated, and bad. Replace the swap map with 2 bitmaps (one for allocated state and one for bad state), saving 6 bits per swap entry. Signed-off-by: Nhat Pham --- include/linux/swap.h | 3 +- mm/swapfile.c | 81 +++++++++++++++++++++++--------------------- 2 files changed, 44 insertions(+), 40 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 21e528d8d3480..3c789149996c5 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -259,7 +259,8 @@ struct swap_info_struct { struct plist_node list; /* entry in swap_active_head */ signed char type; /* strange name for an index */ unsigned int max; /* extent of the swap_map */ - unsigned char *swap_map; /* vmalloc'ed array of usage counts */ + unsigned long *swap_map; /* bitmap for allocated state */ + unsigned long *bad_map; /* bitmap for bad state */ struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */ struct list_head free_clusters; /* free clusters list */ struct list_head full_clusters; /* full clusters list */ diff --git a/mm/swapfile.c b/mm/swapfile.c index b553652125d11..3e2bfcf1aa789 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -760,25 +760,19 @@ static bool cluster_reclaim_range(struct swap_info_struct *si, struct swap_cluster_info *ci, unsigned long start, unsigned long end) { - unsigned char *map = si->swap_map; unsigned long offset = start; int nr_reclaim; spin_unlock(&ci->lock); do { - switch (READ_ONCE(map[offset])) { - case 0: + if (!test_bit(offset, si->swap_map)) { offset++; - break; - case SWAP_MAP_ALLOCATED: + } else { nr_reclaim = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY); if (nr_reclaim > 0) offset += nr_reclaim; else goto out; - break; - default: - goto out; } } while (offset < end); out: @@ -787,11 +781,7 @@ static bool cluster_reclaim_range(struct swap_info_struct *si, * Recheck the range no matter reclaim succeeded or not, the slot * could have been be freed while we are not holding the lock. */ - for (offset = start; offset < end; offset++) - if (READ_ONCE(map[offset])) - return false; - - return true; + return find_next_bit(si->swap_map, end, start) >= end; } static bool cluster_scan_range(struct swap_info_struct *si, @@ -800,15 +790,16 @@ static bool cluster_scan_range(struct swap_info_struct *si, bool *need_reclaim) { unsigned long offset, end = start + nr_pages; - unsigned char *map = si->swap_map; - unsigned char count; if (cluster_is_empty(ci)) return true; for (offset = start; offset < end; offset++) { - count = READ_ONCE(map[offset]); - if (!count) + /* Bad slots cannot be used for allocation */ + if (test_bit(offset, si->bad_map)) + return false; + + if (!test_bit(offset, si->swap_map)) continue; if (swap_cache_only(si, offset)) { @@ -841,7 +832,7 @@ static bool cluster_alloc_range(struct swap_info_struct *si, struct swap_cluster if (cluster_is_empty(ci)) ci->order = order; - memset(si->swap_map + start, usage, nr_pages); + bitmap_set(si->swap_map, start, nr_pages); swap_range_alloc(si, nr_pages); ci->count += nr_pages; @@ -1407,7 +1398,7 @@ static struct swap_info_struct *_swap_info_get(swp_slot_t slot) offset = swp_slot_offset(slot); if (offset >= si->max) goto bad_offset; - if (data_race(!si->swap_map[swp_slot_offset(slot)])) + if (data_race(!test_bit(offset, si->swap_map))) goto bad_free; return si; @@ -1521,8 +1512,7 @@ static void swap_slots_free(struct swap_info_struct *si, swp_slot_t slot, unsigned int nr_pages) { unsigned long offset = swp_slot_offset(slot); - unsigned char *map = si->swap_map + offset; - unsigned char *map_end = map + nr_pages; + unsigned long end = offset + nr_pages; /* It should never free entries across different clusters */ VM_BUG_ON(ci != __swap_offset_to_cluster(si, offset + nr_pages - 1)); @@ -1530,10 +1520,8 @@ static void swap_slots_free(struct swap_info_struct *si, VM_BUG_ON(ci->count < nr_pages); ci->count -= nr_pages; - do { - VM_BUG_ON(!swap_is_last_ref(*map)); - *map = 0; - } while (++map < map_end); + VM_BUG_ON(find_next_zero_bit(si->swap_map, end, offset) < end); + bitmap_clear(si->swap_map, offset, nr_pages); swap_range_free(si, offset, nr_pages); @@ -1744,9 +1732,7 @@ unsigned int count_swap_pages(int type, int free) static bool swap_slot_allocated(struct swap_info_struct *si, unsigned long offset) { - unsigned char count = READ_ONCE(si->swap_map[offset]); - - return count && swap_count(count) != SWAP_MAP_BAD; + return test_bit(offset, si->swap_map); } /* @@ -2067,7 +2053,7 @@ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span) } static void setup_swap_info(struct swap_info_struct *si, int prio, - unsigned char *swap_map, + unsigned long *swap_map, struct swap_cluster_info *cluster_info) { si->prio = prio; @@ -2095,7 +2081,7 @@ static void _enable_swap_info(struct swap_info_struct *si) } static void enable_swap_info(struct swap_info_struct *si, int prio, - unsigned char *swap_map, + unsigned long *swap_map, struct swap_cluster_info *cluster_info) { spin_lock(&swap_lock); @@ -2188,7 +2174,8 @@ static void flush_percpu_swap_cluster(struct swap_info_struct *si) SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) { struct swap_info_struct *p = NULL; - unsigned char *swap_map; + unsigned long *swap_map; + unsigned long *bad_map; struct swap_cluster_info *cluster_info; struct file *swap_file, *victim; struct address_space *mapping; @@ -2283,6 +2270,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) p->swap_file = NULL; swap_map = p->swap_map; p->swap_map = NULL; + bad_map = p->bad_map; + p->bad_map = NULL; maxpages = p->max; cluster_info = p->cluster_info; p->max = 0; @@ -2293,7 +2282,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) mutex_unlock(&swapon_mutex); kfree(p->global_cluster); p->global_cluster = NULL; - vfree(swap_map); + kvfree(swap_map); + kvfree(bad_map); free_cluster_info(cluster_info, maxpages); inode = mapping->host; @@ -2641,18 +2631,20 @@ static unsigned long read_swap_header(struct swap_info_struct *si, static int setup_swap_map(struct swap_info_struct *si, union swap_header *swap_header, - unsigned char *swap_map, + unsigned long *swap_map, + unsigned long *bad_map, unsigned long maxpages) { unsigned long i; - swap_map[0] = SWAP_MAP_BAD; /* omit header page */ + set_bit(0, bad_map); /* omit header page */ + for (i = 0; i < swap_header->info.nr_badpages; i++) { unsigned int page_nr = swap_header->info.badpages[i]; if (page_nr == 0 || page_nr > swap_header->info.last_page) return -EINVAL; if (page_nr < maxpages) { - swap_map[page_nr] = SWAP_MAP_BAD; + set_bit(page_nr, bad_map); si->pages--; } } @@ -2756,7 +2748,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) int nr_extents; sector_t span; unsigned long maxpages; - unsigned char *swap_map = NULL; + unsigned long *swap_map = NULL, *bad_map = NULL; struct swap_cluster_info *cluster_info = NULL; struct folio *folio = NULL; struct inode *inode = NULL; @@ -2852,16 +2844,24 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) maxpages = si->max; /* OK, set up the swap map and apply the bad block list */ - swap_map = vzalloc(maxpages); + swap_map = kvcalloc(BITS_TO_LONGS(maxpages), sizeof(long), GFP_KERNEL); if (!swap_map) { error = -ENOMEM; goto bad_swap_unlock_inode; } - error = setup_swap_map(si, swap_header, swap_map, maxpages); + bad_map = kvcalloc(BITS_TO_LONGS(maxpages), sizeof(long), GFP_KERNEL); + if (!bad_map) { + error = -ENOMEM; + goto bad_swap_unlock_inode; + } + + error = setup_swap_map(si, swap_header, swap_map, bad_map, maxpages); if (error) goto bad_swap_unlock_inode; + si->bad_map = bad_map; + if (si->bdev && bdev_stable_writes(si->bdev)) si->flags |= SWP_STABLE_WRITES; @@ -2955,7 +2955,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) si->swap_file = NULL; si->flags = 0; spin_unlock(&swap_lock); - vfree(swap_map); + if (swap_map) + kvfree(swap_map); + if (bad_map) + kvfree(bad_map); if (cluster_info) free_cluster_info(cluster_info, maxpages); if (inced_nr_rotate_swap) -- 2.52.0