<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">
From: Hiroyuki KAMEZAWA &lt;kamezawa.hiroyu@jp.fujitsu.com&gt;

This patch removes bitmaps from page allocator in mm/page_alloc.c.

This buddy system uses page-&gt;private field to record free page's order
instead of using bitmaps.

The algorithm of the buddy system is unchanged. Only bitmaps are removed.

In this buddy system, 2 pages,a page and "buddy", can be coalesced when

(buddy-&gt;private &amp; PG_private) &amp;&amp;
(page_order(page)) == (page_order(buddy)) &amp;&amp;
!PageReserved(buddy) &amp;&amp;
page_count(buddy) == 0

this also means "buddy" is a head of continuous free pages
of length of (1 &lt;&lt; page_order(buddy)).

bad_range() is called from inner loop of __free_pages_bulk().
In many archs, bad_range() is only a sanity check, it will always return 0.
But if a zone's memmap has a hole, it sometimes returns 1.
An architecture with memory holes in a zone has to define CONFIG_HOLES_IN_ZONE.
When CONFIG_HOLES_IN_ZONE is defined, pfn_valid() is called for checking
whether a buddy pages is valid or not.

Signed-off-by: KAMEZAWA Hiroyuki &lt;kamezawa.hiroyu@jp.fujitsu.com&gt;
Signed-off-by: Andrew Morton &lt;akpm@osdl.org&gt;
---

 25-akpm/mm/page_alloc.c |  163 +++++++++++++++++++++++-------------------------
 1 files changed, 81 insertions(+), 82 deletions(-)

diff -puN mm/page_alloc.c~no-buddy-bitmap-patch-revisit-for-mm-page_allocc mm/page_alloc.c
--- 25/mm/page_alloc.c~no-buddy-bitmap-patch-revisit-for-mm-page_allocc	2004-11-10 20:44:10.224306552 -0800
+++ 25-akpm/mm/page_alloc.c	2004-11-10 20:44:10.230305640 -0800
@@ -71,6 +71,10 @@ static int bad_range(struct zone *zone, 
 		return 1;
 	if (page_to_pfn(page) &lt; zone-&gt;zone_start_pfn)
 		return 1;
+#ifdef CONFIG_HOLES_IN_ZONE
+	if (!pfn_valid(page_to_pfn(page)))
+		return 1;
+#endif
 	if (zone != page_zone(page))
 		return 1;
 	return 0;
@@ -159,6 +163,45 @@ static void destroy_compound_page(struct
 #endif		/* CONFIG_HUGETLB_PAGE */
 
 /*
+ * function for dealing with page's order in buddy system.
+ * zone-&gt;lock is already acquired when we use these.
+ * So, we don't need atomic page-&gt;flags operations here.
+ */
+static inline unsigned long page_order(struct page *page) {
+	return page-&gt;private;
+}
+
+static inline void set_page_order(struct page *page, int order) {
+	page-&gt;private = order;
+	__SetPagePrivate(page);
+}
+
+static inline void rmv_page_order(struct page *page)
+{
+	__ClearPagePrivate(page);
+	page-&gt;private = 0;
+}
+
+/*
+ * This function checks whether a page is free &amp;&amp; is the buddy
+ * we can do coalesce a page and its buddy if
+ * (a) the buddy is free &amp;&amp;
+ * (b) the buddy is on the buddy system &amp;&amp;
+ * (c) a page and its buddy have the same order.
+ * for recording page's order, we use page-&gt;private and PG_private.
+ *
+ */
+static inline int page_is_buddy(struct page *page, int order)
+{
+       if (PagePrivate(page)           &amp;&amp;
+           (page_order(page) == order) &amp;&amp;
+           !PageReserved(page)         &amp;&amp;
+            page_count(page) == 0)
+               return 1;
+       return 0;
+}
+
+/*
  * Freeing function for a buddy system allocator.
  *
  * The concept of a buddy system is to maintain direct-mapped table
@@ -170,9 +213,10 @@ static void destroy_compound_page(struct
  * at the bottom level available, and propagating the changes upward
  * as necessary, plus some accounting needed to play nicely with other
  * parts of the VM system.
- * At each level, we keep one bit for each pair of blocks, which
- * is set to 1 iff only one of the pair is allocated.  So when we
- * are allocating or freeing one, we can derive the state of the
+ * At each level, we keep a list of pages, which are heads of continuous
+ * free pages of length of (1 &lt;&lt; order) and marked with PG_Private.Page's
+ * order is recorded in page-&gt;private field.
+ * So when we are allocating or freeing one, we can derive the state of the
  * other.  That is, if we allocate a small block, and both were   
  * free, the remainder of the region must be split into blocks.   
  * If a block is freed, and its buddy is also free, then this
@@ -182,44 +226,43 @@ static void destroy_compound_page(struct
  */
 
 static inline void __free_pages_bulk (struct page *page, struct page *base,
-		struct zone *zone, struct free_area *area, unsigned int order)
+		struct zone *zone, unsigned int order)
 {
-	unsigned long page_idx, index, mask;
+	unsigned long page_idx;
+	struct page *coalesced;
+	int order_size = 1 &lt;&lt; order;
 
-	if (order)
+	if (unlikely(order))
 		destroy_compound_page(page, order);
-	mask = (~0UL) &lt;&lt; order;
+
 	page_idx = page - base;
-	if (page_idx &amp; ~mask)
-		BUG();
-	index = page_idx &gt;&gt; (1 + order);
 
-	zone-&gt;free_pages += 1 &lt;&lt; order;
-	while (order &lt; MAX_ORDER-1) {
-		struct page *buddy1, *buddy2;
+	BUG_ON(page_idx &amp; (order_size - 1));
+	BUG_ON(bad_range(zone, page));
 
-		BUG_ON(area &gt;= zone-&gt;free_area + MAX_ORDER);
-		if (!__test_and_change_bit(index, area-&gt;map))
-			/*
-			 * the buddy page is still allocated.
-			 */
+	zone-&gt;free_pages += order_size;
+	while (order &lt; MAX_ORDER-1) {
+		struct free_area *area;
+		struct page *buddy;
+		int buddy_idx;
+
+		buddy_idx = (page_idx ^ (1 &lt;&lt; order));
+		buddy = base + buddy_idx;
+		if (bad_range(zone, buddy))
+			break;
+		if (!page_is_buddy(buddy, order))
 			break;
-
 		/* Move the buddy up one level. */
-		buddy1 = base + (page_idx ^ (1 &lt;&lt; order));
-		buddy2 = base + page_idx;
-		BUG_ON(bad_range(zone, buddy1));
-		BUG_ON(bad_range(zone, buddy2));
-		list_del(&amp;buddy1-&gt;lru);
+		list_del(&amp;buddy-&gt;lru);
+		area = zone-&gt;free_area + order;
 		area-&gt;nr_free--;
-		mask &lt;&lt;= 1;
+		rmv_page_order(buddy);
+		page_idx &amp;= buddy_idx;
 		order++;
-		area++;
-		index &gt;&gt;= 1;
-		page_idx &amp;= mask;
 	}
-	list_add(&amp;(base + page_idx)-&gt;lru, &amp;area-&gt;free_list);
-	area-&gt;nr_free++;
+	coalesced = base + page_idx;
+	set_page_order(coalesced, order);
+	list_add(&amp;coalesced-&gt;lru, &amp;zone-&gt;free_area[order].free_list);
 }
 
 static inline void free_pages_check(const char *function, struct page *page)
@@ -257,12 +300,10 @@ free_pages_bulk(struct zone *zone, int c
 		struct list_head *list, unsigned int order)
 {
 	unsigned long flags;
-	struct free_area *area;
 	struct page *base, *page = NULL;
 	int ret = 0;
 
 	base = zone-&gt;zone_mem_map;
-	area = zone-&gt;free_area + order;
 	spin_lock_irqsave(&amp;zone-&gt;lock, flags);
 	zone-&gt;all_unreclaimable = 0;
 	zone-&gt;pages_scanned = 0;
@@ -270,7 +311,7 @@ free_pages_bulk(struct zone *zone, int c
 		page = list_entry(list-&gt;prev, struct page, lru);
 		/* have to delete it as __free_pages_bulk list manipulates */
 		list_del(&amp;page-&gt;lru);
-		__free_pages_bulk(page, base, zone, area, order);
+		__free_pages_bulk(page, base, zone, order);
 		ret++;
 	}
 	spin_unlock_irqrestore(&amp;zone-&gt;lock, flags);
@@ -299,8 +340,6 @@ void __free_pages_ok(struct page *page, 
 	free_pages_bulk(page_zone(page), 1, &amp;list, order);
 }
 
-#define MARK_USED(index, order, area) \
-	__change_bit((index) &gt;&gt; (1+(order)), (area)-&gt;map)
 
 /*
  * The order of subdivision here is critical for the IO subsystem.
@@ -318,7 +357,7 @@ void __free_pages_ok(struct page *page, 
  */
 static inline struct page *
 expand(struct zone *zone, struct page *page,
-	 unsigned long index, int low, int high, struct free_area *area)
+ 	int low, int high, struct free_area *area)
 {
 	unsigned long size = 1 &lt;&lt; high;
 
@@ -329,7 +368,7 @@ expand(struct zone *zone, struct page *p
 		BUG_ON(bad_range(zone, &amp;page[size]));
 		list_add(&amp;page[size].lru, &amp;area-&gt;free_list);
 		area-&gt;nr_free++;
-		MARK_USED(index + size, high, area);
+		set_page_order(&amp;page[size], high);
 	}
 	return page;
 }
@@ -384,7 +423,6 @@ static struct page *__rmqueue(struct zon
 	struct free_area * area;
 	unsigned int current_order;
 	struct page *page;
-	unsigned int index;
 
 	for (current_order = order; current_order &lt; MAX_ORDER; ++current_order) {
 		area = zone-&gt;free_area + current_order;
@@ -393,12 +431,10 @@ static struct page *__rmqueue(struct zon
 
 		page = list_entry(area-&gt;free_list.next, struct page, lru);
 		list_del(&amp;page-&gt;lru);
+		rmv_page_order(page);
 		area-&gt;nr_free--;
-		index = page - zone-&gt;zone_mem_map;
-		if (current_order != MAX_ORDER-1)
-			MARK_USED(index, current_order, area);
 		zone-&gt;free_pages -= 1UL &lt;&lt; order;
-		return expand(zone, page, index, order, current_order, area);
+		return expand(zone, page, order, current_order, area);
 	}
 
 	return NULL;
@@ -1567,49 +1603,12 @@ void __init memmap_init_zone(unsigned lo
 	}
 }
 
-/*
- * Page buddy system uses "index &gt;&gt; (i+1)", where "index" is
- * at most "size-1".
- *
- * The extra "+3" is to round down to byte size (8 bits per byte
- * assumption). Thus we get "(size-1) &gt;&gt; (i+4)" as the last byte
- * we can access.
- *
- * The "+1" is because we want to round the byte allocation up
- * rather than down. So we should have had a "+7" before we shifted
- * down by three. Also, we have to add one as we actually _use_ the
- * last bit (it's [0,n] inclusive, not [0,n[).
- *
- * So we actually had +7+1 before we shift down by 3. But
- * (n+8) &gt;&gt; 3 == (n &gt;&gt; 3) + 1 (modulo overflows, which we do not have).
- *
- * Finally, we LONG_ALIGN because all bitmap operations are on longs.
- */
-unsigned long pages_to_bitmap_size(unsigned long order, unsigned long nr_pages)
-{
-	unsigned long bitmap_size;
-
-	bitmap_size = (nr_pages-1) &gt;&gt; (order+4);
-	bitmap_size = LONG_ALIGN(bitmap_size+1);
-
-	return bitmap_size;
-}
-
-void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone, unsigned long size)
+void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone,
+				unsigned long size)
 {
 	int order;
-	for (order = 0; ; order++) {
-		unsigned long bitmap_size;
-
+	for (order = 0; order &lt; MAX_ORDER ; order++) {
 		INIT_LIST_HEAD(&amp;zone-&gt;free_area[order].free_list);
-		if (order == MAX_ORDER-1) {
-			zone-&gt;free_area[order].map = NULL;
-			break;
-		}
-
-		bitmap_size = pages_to_bitmap_size(order, size);
-		zone-&gt;free_area[order].map =
-		  (unsigned long *) alloc_bootmem_node(pgdat, bitmap_size);
 		zone-&gt;free_area[order].nr_free = 0;
 	}
 }
_
</pre></body></html>