[PATCH] mm/page_alloc: avoid deadlocks for &pagesets.lock

Desmond Cheong Zhi Xi desmondcheongzx at gmail.com
Wed Jul 7 11:12:45 UTC 2021


Syzbot reports a number of potential deadlocks for &pagesets.lock. It
seems that this new lock is being used as both an inner and outer
lock, which makes it prone to creating circular dependencies.

For example, one such call trace goes as follows:
  __alloc_pages_bulk()
    local_lock_irqsave(&pagesets.lock, flags) <---- outer lock here
    prep_new_page():
      post_alloc_hook():
        set_page_owner():
          __set_page_owner():
            save_stack():
              stack_depot_save():
                alloc_pages():
                  alloc_page_interleave():
                    __alloc_pages():
                      get_page_from_freelist():
                        rm_queue():
                          rm_queue_pcplist():
                            local_lock_irqsave(&pagesets.lock, flags);
                            *** DEADLOCK ***

The common culprit for the lockdep splats seems to be the call to
local_lock_irqsave(&pagesets.lock, flags) inside
__alloc_pages_bulk(). &pagesets.lock becomes an outer lock if it's
held during the call to prep_new_page().

As the local lock is used to protect the PCP structure, we adjust the
locking in __alloc_pages_bulk so that only the necessary structures
are protected.

Fixes: dbbee9d5cd83 ("mm/page_alloc: convert per-cpu list protection to local_lock")
Reported-and-tested-by: syzbot+127fd7828d6eeb611703 at syzkaller.appspotmail.com
Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx at gmail.com>
---
 mm/page_alloc.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0817d88383d5..0e005b1a60e3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5288,10 +5288,6 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 		goto failed;
 
 	/* Attempt the batch allocation */
-	local_lock_irqsave(&pagesets.lock, flags);
-	pcp = this_cpu_ptr(zone->per_cpu_pageset);
-	pcp_list = &pcp->lists[order_to_pindex(ac.migratetype, 0)];
-
 	while (nr_populated < nr_pages) {
 
 		/* Skip existing pages */
@@ -5300,12 +5296,16 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 			continue;
 		}
 
+		local_lock_irqsave(&pagesets.lock, flags);
+		pcp = this_cpu_ptr(zone->per_cpu_pageset);
+		pcp_list = &pcp->lists[order_to_pindex(ac.migratetype, 0)];
 		page = __rmqueue_pcplist(zone, 0, ac.migratetype, alloc_flags,
 								pcp, pcp_list);
+		local_unlock_irqrestore(&pagesets.lock, flags);
 		if (unlikely(!page)) {
 			/* Try and get at least one page */
 			if (!nr_populated)
-				goto failed_irq;
+				goto failed;
 			break;
 		}
 		nr_account++;
@@ -5318,16 +5318,11 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 		nr_populated++;
 	}
 
-	local_unlock_irqrestore(&pagesets.lock, flags);
-
 	__count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account);
 	zone_statistics(ac.preferred_zoneref->zone, zone, nr_account);
 
 	return nr_populated;
 
-failed_irq:
-	local_unlock_irqrestore(&pagesets.lock, flags);
-
 failed:
 	page = __alloc_pages(gfp, 0, preferred_nid, nodemask);
 	if (page) {
-- 
2.25.1



More information about the Linux-kernel-mentees mailing list