Skip to content

Commit e4ae114

Browse files
Page cache: implement page eviction for user-mapped pages
Page cache pages that are mapped by the user process and faulted-in on-demand are never evicted from the cache until they are unmapped; this includes all the read-only sections of the user program ELF (which are never unmapped). To allow relasing memory from the above pages during low-memory or out-of-memory conditions, enhance the memory cleaner implementation so that it scans both shared and private mappings, and evicts "old" pages, i.e. pages that have not been accessed recently (in OOM conditions, be more aggressive by evicting even recently accessed pages). To do the eviction safely on SMP machines, invoke a synchronous (rendezvous-based) TLB shootdown. This optimizes memory utilization by the page cache, e.g. it allows more memory to be released to the host OS via a virtio balloon device. This change can cause a SIGBUS signal to be delivered to the user process when a page cannot be faulted-in during the "ruby_alloc" end-to-end test; the Ruby program handles this signal by dumping process state information and then aborting (i.e. raising a SIGABRT signal); therefore, to make this test pass, add "6" to the list of expected exit codes for this test.
1 parent 130da34 commit e4ae114

File tree

9 files changed

+155
-10
lines changed

9 files changed

+155
-10
lines changed

src/aarch64/page_machine.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,11 +407,23 @@ static inline boolean pte_is_dirty(pte entry)
407407
return false;
408408
}
409409

410+
static inline boolean pte_is_accessed(pte entry)
411+
{
412+
return (entry & PAGE_ATTR_AF) != 0;
413+
}
414+
410415
static inline void pt_pte_clean(pteptr pte)
411416
{
412417
// XXX TODO
413418
}
414419

420+
static inline boolean pte_clear_accessed(pteptr pp)
421+
{
422+
boolean accessed = !!(*pp & PAGE_ATTR_AF);
423+
*pp &= ~PAGE_ATTR_AF;
424+
return accessed;
425+
}
426+
415427
static inline u64 page_from_pte(pte pte)
416428
{
417429
return pte & PAGE_4K_NEXT_TABLE_OR_PAGE_OUT_MASK;

src/kernel/init.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -688,10 +688,10 @@ closure_function(2, 3, void, attach_storage,
688688
apply(req_handler, &req);
689689
}
690690

691-
closure_func_basic(mem_cleaner, u64, mm_pagecache_cleaner,
692-
u64 clean_bytes)
691+
closure_func_basic(mem_wcleaner, u64, mm_pagecache_cleaner,
692+
u64 clean_bytes, u32 flags)
693693
{
694-
return pagecache_drain(clean_bytes);
694+
return pagecache_drain(clean_bytes, flags);
695695
}
696696

697697
static void read_kernel_syms(void)
@@ -731,9 +731,9 @@ void kernel_runtime_init(kernel_heaps kh)
731731
init_sg(locked);
732732
dma_init(kh);
733733
init_pagecache(locked, (heap)kh->pages, PAGESIZE);
734-
mem_cleaner pc_cleaner = closure_func(misc, mem_cleaner, mm_pagecache_cleaner);
734+
mem_wcleaner pc_cleaner = closure_func(misc, mem_wcleaner, mm_pagecache_cleaner);
735735
assert(pc_cleaner != INVALID_ADDRESS);
736-
assert(mm_register_mem_cleaner(pc_cleaner));
736+
assert(mm_register_mem_wcleaner(pc_cleaner));
737737
init_extra_prints();
738738
init_pci(kh);
739739
init_console(kh);

src/kernel/pagecache.c

Lines changed: 102 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ typedef struct pagecache_page_entry {
3232
pagecache_page pp;
3333
} *pagecache_page_entry;
3434

35+
typedef struct pagecache_drain_work {
36+
buffer page_entries;
37+
closure_struct(thunk, inval_complete);
38+
int freed_pages;
39+
} *pagecache_drain_work;
40+
3541
#define pagecache_lock_mappings() pagecache_lock(global_pagecache)
3642
#define pagecache_unlock_mappings() pagecache_unlock(global_pagecache)
3743

@@ -808,7 +814,76 @@ static void pagecache_delete_pages_locked(pagecache pc)
808814
}
809815
}
810816

811-
u64 pagecache_drain(u64 drain_bytes)
817+
closure_function(3, 3, boolean, pagecache_check_old_page,
818+
pagecache_map, pcm, flush_entry, fe, buffer, page_entries,
819+
int level, u64 vaddr, pteptr entry)
820+
{
821+
pagecache pc = global_pagecache;
822+
pagecache_map pcm = bound(pcm);
823+
pte old_entry = pte_from_pteptr(entry);
824+
boolean abort = false;
825+
if (pte_is_present(old_entry) && pte_is_mapping(level, old_entry) && !pte_is_dirty(old_entry)) {
826+
u64 pi = (pcm->node_offset + vaddr - pcm->n.r.start) >> pc->page_order;
827+
pagecache_node pn = pcm->pn;
828+
pagecache_lock_node(pn);
829+
pagecache_page pp = page_lookup_nodelocked(pn, pi);
830+
if ((pp != INVALID_ADDRESS) && (page_from_pte(old_entry) == pp->phys)) {
831+
flush_entry fe = bound(fe);
832+
if (pte_clear_accessed(entry)) {
833+
page_invalidate(fe, vaddr);
834+
pagecache_lock_state(pc);
835+
touch_page_locked(pn, pp, 0);
836+
pagecache_unlock_state(pc);
837+
} else if (pp->evicted) {
838+
buffer page_entries = bound(page_entries);
839+
if (buffer_space(page_entries) >= sizeof(struct pagecache_page_entry)) {
840+
page_invalidate(fe, vaddr);
841+
pagecache_page_entry e = buffer_end(page_entries);
842+
e->pte_ptr = entry;
843+
e->pp = pp;
844+
buffer_produce(page_entries, sizeof(*e));
845+
} else {
846+
abort = true;
847+
}
848+
}
849+
}
850+
pagecache_unlock_node(pn);
851+
}
852+
return !abort;
853+
}
854+
855+
static void pagecache_scan_old_maps(list head, flush_entry fe, buffer page_entries)
856+
{
857+
list_foreach(head, l) {
858+
pagecache_map pcm = struct_from_list(l, pagecache_map, l);
859+
if (!traverse_ptes(pcm->n.r.start, range_span(pcm->n.r),
860+
stack_closure(pagecache_check_old_page, pcm, fe, page_entries)))
861+
return;
862+
}
863+
}
864+
865+
closure_func_basic(thunk, void, pagecache_inval_complete)
866+
{
867+
pagecache_drain_work drain_work = struct_from_closure(pagecache_drain_work, inval_complete);
868+
pagecache_page_entry entry;
869+
pagecache pc = global_pagecache;
870+
pagecache_lock_state(pc);
871+
while ((entry = buffer_pop(drain_work->page_entries, sizeof(*entry))) != 0) {
872+
pteptr pte_ptr = entry->pte_ptr;
873+
pagecache_page pp = entry->pp;
874+
pte pt_entry = pte_from_pteptr(pte_ptr);
875+
if (pte_is_present(pt_entry) && (page_from_pte(pt_entry) == pp->phys) &&
876+
!pte_is_dirty(pt_entry) && !pte_is_accessed(pt_entry)) {
877+
pte_set(pte_ptr, 0);
878+
if (pp->refcount == 1)
879+
drain_work->freed_pages++;
880+
pagecache_page_release_locked(pc, pp, true);
881+
}
882+
}
883+
pagecache_unlock_state(pc);
884+
}
885+
886+
u64 pagecache_drain(u64 drain_bytes, u32 flags)
812887
{
813888
pagecache pc = global_pagecache;
814889
u64 pages = pad(drain_bytes, cache_pagesize(pc)) >> pc->page_order;
@@ -825,6 +900,31 @@ u64 pagecache_drain(u64 drain_bytes)
825900
if (drained < drain_bytes)
826901
drained += cache_drain((caching_heap)pc->completions, drain_bytes - drained,
827902
PAGECACHE_COMPLETIONS_RETAIN * sizeof(struct page_completion));
903+
if ((drained < drain_bytes) && (flags & MEMCLEAN_CANWAIT)) {
904+
timestamp here = now(CLOCK_ID_MONOTONIC_RAW);
905+
boolean purge_mappings;
906+
if (flags & MEMCLEAN_OOM)
907+
purge_mappings = true;
908+
else {
909+
purge_mappings = ((here - pc->map_purge) >= seconds(PAGECACHE_SCAN_PERIOD_SECONDS));
910+
}
911+
if (purge_mappings) {
912+
pc->map_purge = here;
913+
struct pagecache_drain_work drain_work;
914+
buffer page_entries = drain_work.page_entries =
915+
little_stack_buffer(context_stack_space() / 2);
916+
flush_entry fe = get_page_flush_entry();
917+
pagecache_lock_mappings();
918+
pagecache_scan_old_maps(&pc->shared_maps, fe, page_entries);
919+
pagecache_scan_old_maps(&pc->private_maps, fe, page_entries);
920+
pagecache_unlock_mappings();
921+
drain_work.freed_pages = 0;
922+
page_invalidate_sync(fe, init_closure_func(&drain_work.inval_complete, thunk,
923+
pagecache_inval_complete),
924+
true);
925+
drained += drain_work.freed_pages * cache_pagesize(pc);
926+
}
927+
}
828928
return drained;
829929
}
830930

@@ -2139,6 +2239,7 @@ void init_pagecache(heap general, heap contiguous, u64 pagesize)
21392239
list_init(&pc->private_maps);
21402240
init_closure_func(&pc->page_compare, rb_key_compare, pagecache_page_compare);
21412241
init_closure_func(&pc->page_print_key, rbnode_handler, pagecache_page_print_key);
2242+
pc->map_purge = 0;
21422243

21432244
pc->writeback_in_progress = false;
21442245
init_timer(&pc->scan_timer);

src/kernel/pagecache.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ int pagecache_get_page_order(void);
2828

2929
u64 pagecache_get_occupancy(void);
3030

31-
u64 pagecache_drain(u64 drain_bytes);
31+
u64 pagecache_drain(u64 drain_bytes, u32 flags);
3232

3333
pagecache_node pagecache_allocate_node(pagecache_volume pv, sg_io fs_read, sg_io fs_write, pagecache_node_reserve fs_reserve);
3434

src/kernel/pagecache_internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ typedef struct pagecache {
3434
struct list volumes;
3535
struct list shared_maps;
3636
struct list private_maps;
37+
timestamp map_purge;
3738

3839
boolean writeback_in_progress;
3940
struct timer scan_timer;

src/riscv64/page_machine.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ static inline u64 get_pagetable_base(u64 vaddr)
1616
#define PAGE_EXEC U64_FROM_BIT(3)
1717
#define PAGE_USER U64_FROM_BIT(4)
1818
#define PAGE_GLOBAL U64_FROM_BIT(5)
19+
#define PAGE_ACCESSED U64_FROM_BIT(6)
1920
#define PAGE_DIRTY U64_FROM_BIT(7)
2021
#define PAGE_NO_BLOCK U64_FROM_BIT(8) // RSW[0]
2122
#define PAGE_DEFAULT_PERMISSIONS (PAGE_READABLE)
@@ -213,11 +214,23 @@ static inline boolean pte_is_dirty(pte entry)
213214
return (entry & PAGE_DIRTY) != 0;
214215
}
215216

217+
static inline boolean pte_is_accessed(pte entry)
218+
{
219+
return (entry & PAGE_ACCESSED) != 0;
220+
}
221+
216222
static inline void pt_pte_clean(pteptr pte)
217223
{
218224
*pte &= ~PAGE_DIRTY;
219225
}
220226

227+
static inline boolean pte_clear_accessed(pteptr pp)
228+
{
229+
boolean accessed = !!(*pp & PAGE_ACCESSED);
230+
*pp &= ~PAGE_ACCESSED;
231+
return accessed;
232+
}
233+
221234
static inline u64 page_from_pte(pte pte)
222235
{
223236
return (pte & (MASK(54) & ~PAGE_FLAGS_MASK))<<2;

src/unix/exec.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,20 +278,26 @@ closure_function(4, 5, boolean, faulting_map,
278278
u64 vmflags = VMAP_FLAG_READABLE | VMAP_FLAG_PROG;
279279
if (pageflags_is_exec(flags))
280280
vmflags |= VMAP_FLAG_EXEC;
281-
if (pageflags_is_writable(flags))
281+
boolean rw = pageflags_is_writable(flags);
282+
if (rw)
282283
vmflags |= VMAP_FLAG_WRITABLE;
283284
if (tail_bss > 0)
284285
vmflags |= VMAP_FLAG_TAIL_BSS;
285286
range r = irangel(map_start, data_map_size);
286287
exec_debug("%s: add %s to vmap: %R vmflags 0x%lx, offset 0x%lx, data_size 0x%lx, tail_bss 0x%lx\n",
287288
func_ss, pageflags_is_exec(flags) ? ss("text") : ss("data"),
288289
r, vmflags, offset, data_size, tail_bss);
290+
pagecache_node pn = fsfile_get_cachenode(bound(f));
289291
struct vmap k = ivmap(vmflags, bound(allowed_flags), offset,
290-
fsfile_get_cachenode(bound(f)), 0);
292+
pn, 0);
291293
if (tail_bss > 0)
292294
k.bss_offset = data_size;
293295
if (allocate_vmap(bound(p), r, k) == INVALID_ADDRESS)
294296
goto alloc_fail;
297+
if (!rw)
298+
/* Make the page cache aware of this mapping so that it can evict relevant pages on
299+
* memory pressure. */
300+
pagecache_node_add_mapping(pn, r, offset, false);
295301
map_start += data_map_size;
296302
bss_size -= tail_bss;
297303
}

src/x86_64/page_machine.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,11 @@ static inline boolean pte_is_dirty(pte entry)
223223
return (entry & PAGE_DIRTY) != 0;
224224
}
225225

226+
static inline boolean pte_is_accessed(pte entry)
227+
{
228+
return (entry & PAGE_ACCESSED) != 0;
229+
}
230+
226231
static inline u64 page_from_pte(pte p)
227232
{
228233
/* page directory pointer base address [51:12] */
@@ -234,6 +239,13 @@ static inline void pt_pte_clean(pteptr pp)
234239
*pp &= ~PAGE_DIRTY;
235240
}
236241

242+
static inline boolean pte_clear_accessed(pteptr pp)
243+
{
244+
boolean accessed = !!(*pp & PAGE_ACCESSED);
245+
*pp &= ~PAGE_ACCESSED;
246+
return accessed;
247+
}
248+
237249
#ifndef physical_from_virtual
238250
static inline u64 pte_lookup_phys(u64 table, u64 vaddr, int offset)
239251
{

test/e2e/ruby_alloc/config.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,6 @@
77
"Kernel": "../../../output/test/e2e/kernel.img",
88
"ManifestPassthrough": {
99
"debug_exit":"t",
10-
"expected_exit_code":["9", "11"]
10+
"expected_exit_code":["6", "9", "11"]
1111
}
1212
}

0 commit comments

Comments
 (0)