demand_paging: Least Recently Used (LRU) eviction algorithm

This is a O(1) LRU eviction algorithm. A bit more complex but way more scalable than the NRU algorithm. Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
2024-06-04 18:00:40 -04:00 · 2024-06-04 18:00:40 -04:00 · 92bc0cd0da
commit 92bc0cd0da
parent 6a3aa3b04e
3 changed files with 179 additions and 0 deletions
--- a/subsys/demand_paging/eviction/CMakeLists.txt
+++ b/subsys/demand_paging/eviction/CMakeLists.txt
@ -11,4 +11,5 @@ include_directories(
 if(NOT DEFINED CONFIG_EVICTION_CUSTOM)
  zephyr_library()
  zephyr_library_sources_ifdef(CONFIG_EVICTION_NRU            nru.c)
+  zephyr_library_sources_ifdef(CONFIG_EVICTION_LRU            lru.c)
 endif()
--- a/subsys/demand_paging/eviction/Kconfig
+++ b/subsys/demand_paging/eviction/Kconfig
@ -27,6 +27,16 @@ config EVICTION_NRU
 	   - not recently accessed, dirty
 	   - not recently accessed, clean

+config EVICTION_LRU
+	bool "Least Recently Used (LRU) page eviction algorithm"
+	help
+	  This implements a Least Recently Used page eviction algorithm.
+	  Usage is tracked based on MMU protection making pages unaccessible
+	  and causing a fault when actually used, using such event to reorder
+	  the page eviction queue. This is more efficient than the NRU
+	  algorithm: all operations are O(1), the accessed flag is cleared on
+	  one page at a time and only when there is a page eviction request.
+
 endchoice

 if EVICTION_NRU
--- a/subsys/demand_paging/eviction/lru.c
+++ b/subsys/demand_paging/eviction/lru.c
@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2024 BayLibre SAS
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Least Recently Used (LRU) eviction algorithm for demand paging.
+ *
+ * This is meant to be used with MMUs that need manual tracking of their
+ * "accessed" page flag so this can be called at the same time.
+ *
+ * Theory of Operation:
+ *
+ * - Page frames made evictable are appended to the end of the LRU queue with
+ *   k_mem_paging_eviction_add(). They are presumably made unaccessible in
+ *   their corresponding MMU page table initially, but not a deal breaker
+ *   if not.
+ *
+ * - When accessed, an unaccessible page causes a fault. The architecture
+ *   fault handler makes the page accessible, marks it as accessed and calls
+ *   k_mem_paging_eviction_accessed() which moves the corresponding page frame
+ *   back to the end of the queue.
+ *
+ * - On page reclammation, the page at the head of the queue is removed for
+ *   that purpose. The new head page is marked unaccessible.
+ *
+ * - If the new head page is actively used, it will cause a fault and be moved
+ *   to the end of the queue, preventing it from being the next page
+ *   reclamation victim. Then the new head page is made unaccessible.
+ *
+ * This way, unused pages will migrate toward the head of the queue, used
+ * pages will tend to remain towards the end of the queue. And there won't be
+ * any fault overhead while the set of accessed pages remain stable.
+ * This algorithm's complexity is O(1).
+ */
+
+#include <zephyr/kernel.h>
+#include <zephyr/kernel/mm/demand_paging.h>
+#include <zephyr/spinlock.h>
+#include <mmu.h>
+#include <kernel_arch_interface.h>
+
+/*
+ * Page frames are ordered according to their access pattern. Using a regular
+ * doubly-linked list with actual pointers would be wasteful as all we need
+ * is a previous PF index and a next PF index for each page frame number
+ * which can be compactly represented in an array.
+ */
+
+/*
+ * Number of bits needed to store a page frame index. Rounded up to a byte
+ * boundary for best compromize between code performance and space saving.
+ */
+#define PF_IDX_BITS ROUND_UP(LOG2CEIL(K_MEM_NUM_PAGE_FRAMES), 8)
+
+/* For each page frame, track the previous and next page frame in the queue. */
+struct lru_pf_idx {
+	uint32_t next : PF_IDX_BITS;
+	uint32_t prev : PF_IDX_BITS;
+} __packed;
+
+static struct lru_pf_idx lru_pf_queue[K_MEM_NUM_PAGE_FRAMES];
+static struct k_spinlock lru_lock;
+
+/* Slot 0 is for head and tail indexes (assuming actual PF #0 won't be used) */
+#define LRU_PF_HEAD lru_pf_queue[0].next
+#define LRU_PF_TAIL lru_pf_queue[0].prev
+
+static inline void lru_pf_append(uint32_t pf_idx)
+{
+	lru_pf_queue[pf_idx].next = 0;
+	lru_pf_queue[pf_idx].prev = LRU_PF_TAIL;
+	lru_pf_queue[LRU_PF_TAIL].next = pf_idx;
+	LRU_PF_TAIL = pf_idx;
+}
+
+static inline void lru_pf_unlink(uint32_t pf_idx)
+{
+	uint32_t next = lru_pf_queue[pf_idx].next;
+	uint32_t prev = lru_pf_queue[pf_idx].prev;
+
+	lru_pf_queue[prev].next = next;
+	lru_pf_queue[next].prev = prev;
+
+	lru_pf_queue[pf_idx].next = 0;
+	lru_pf_queue[pf_idx].prev = 0;
+}
+
+static inline bool lru_pf_in_queue(uint32_t pf_idx)
+{
+	bool unqueued = (lru_pf_queue[pf_idx].next == 0) &&
+			(lru_pf_queue[pf_idx].prev == 0) &&
+			(LRU_PF_HEAD != pf_idx);
+
+	return !unqueued;
+}
+
+static void lru_pf_remove(uint32_t pf_idx)
+{
+	bool was_head = (pf_idx == LRU_PF_HEAD);
+
+	lru_pf_unlink(pf_idx);
+
+	if (was_head && (LRU_PF_HEAD != 0)) {
+		/* make new head PF unaccessible */
+		struct k_mem_page_frame *pf = &k_mem_page_frames[LRU_PF_HEAD];
+		uintptr_t flags = arch_page_info_get(k_mem_page_frame_to_virt(pf), NULL, true);
+
+		/* clearing the accessed flag expected only on loaded pages */
+		__ASSERT((flags & ARCH_DATA_PAGE_LOADED) != 0, "");
+		ARG_UNUSED(flags);
+	}
+}
+
+void k_mem_paging_eviction_add(struct k_mem_page_frame *pf)
+{
+	uint32_t pf_idx = pf - k_mem_page_frames;
+	k_spinlock_key_t key = k_spin_lock(&lru_lock);
+
+	__ASSERT(k_mem_page_frame_is_evictable(pf), "");
+	__ASSERT(pf_idx != 0, "page frame 0 not expected to be used here");
+	__ASSERT(!lru_pf_in_queue(pf_idx), "");
+	lru_pf_append(pf_idx);
+	k_spin_unlock(&lru_lock, key);
+}
+
+void k_mem_paging_eviction_remove(struct k_mem_page_frame *pf)
+{
+	uint32_t pf_idx = pf - k_mem_page_frames;
+	k_spinlock_key_t key = k_spin_lock(&lru_lock);
+
+	__ASSERT(pf_idx != 0, "page frame 0 not expected to be used here");
+	__ASSERT(lru_pf_in_queue(pf_idx), "");
+	lru_pf_remove(pf_idx);
+	k_spin_unlock(&lru_lock, key);
+}
+
+void k_mem_paging_eviction_accessed(uintptr_t phys)
+{
+	struct k_mem_page_frame *pf = k_mem_phys_to_page_frame(phys);
+	uint32_t pf_idx = pf - k_mem_page_frames;
+	k_spinlock_key_t key = k_spin_lock(&lru_lock);
+
+	if (pf_idx != 0 && lru_pf_in_queue(pf_idx)) {
+		lru_pf_remove(pf_idx);
+		lru_pf_append(pf_idx);
+	}
+	k_spin_unlock(&lru_lock, key);
+}
+
+struct k_mem_page_frame *k_mem_paging_eviction_select(bool *dirty_ptr)
+{
+	uint32_t head_pf_idx = LRU_PF_HEAD;
+
+	if (head_pf_idx == 0) {
+		return NULL;
+	}
+
+	struct k_mem_page_frame *pf = &k_mem_page_frames[head_pf_idx];
+	uintptr_t flags = arch_page_info_get(k_mem_page_frame_to_virt(pf), NULL, false);
+
+	__ASSERT(k_mem_page_frame_is_evictable(pf), "");
+	*dirty_ptr = ((flags & ARCH_DATA_PAGE_DIRTY) != 0);
+	return pf;
+}
+
+void k_mem_paging_eviction_init(void)
+{
+}