Skip to content

Commit 1462526

Browse files
Physical heap: switch from id heap to buddy memory allocator
This change introduces a new heap type that implements a buddy memory allocator and is now used as physical memory heap; this brings the following improvements: - lower memory fragmentation: the buddy allocator keeps track of free contiguous memory areas of each power-of-2 size, and when allocating a requested size, uses a minimum-sized free area to satisfy the request - higher memory re-use (which maximizes TLB hit rate and minimizes host memory usage): when there are multiple minimum-sized memory areas available to satisfy an allocation request, the buddy allocator selects the most recently used area - higher scalability: the buddy allocator does not rely on linear searches on a given memory range to select a free area; therefore, the average CPU use to execute allocation requests does not increase with the size of physical memory The typical overhead of the buddy allocator is around 0.1%, i.e. 99.9% of the physical memory available for a VM can be used for kernel and user program allocations. The size of the bootstrap heap does no longer depend on the size of physical memory, therefore calculation of the memory size is no longer done when unneeded, and the bootstrap heap is set up in common code instead of platform-specific code. The PAGEHEAP_LOWMEM_PAGESIZE constant now sets the upper limit for physical memory allocation requests on low-memory guests, and has been changed to 1 MB to be able to allocate the async queue used by the scheduler. The `pages` kernel heap is now a simple wrapper (whose purpose is to avoid complete exhaustion of physical memory) around the page-backed heap, and is used by both the pagecache and the mmap code.
1 parent e9e345f commit 1462526

File tree

21 files changed

+849
-134
lines changed

21 files changed

+849
-134
lines changed

platform/pc/service.c

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ void reclaim_regions(void)
121121
for_regions(e) {
122122
if (e->type == REGION_RECLAIM) {
123123
unmap(e->base, e->length);
124-
if (!id_heap_add_range(heap_physical(get_kernel_heaps()), e->base, e->length))
124+
if (!pageheap_add_range(e->base, e->length))
125125
halt("%s: add range for physical heap failed (%R)\n",
126126
func_ss, irange(e->base, e->base + e->length));
127127
}
@@ -262,9 +262,9 @@ static void find_initial_pages(void)
262262
halt("no initial pages region found; halt\n");
263263
}
264264

265-
id_heap init_physical_id_heap(heap h)
265+
void init_physical_heap(void)
266266
{
267-
u64 phys_length = 0;
267+
/* Carve the bootstrap heap out of a physical memory region. */
268268
for_regions(e) {
269269
if (e->type == REGION_PHYSICAL) {
270270
/* Remove low memory area from physical memory regions, so that it can be used for
@@ -279,27 +279,18 @@ id_heap init_physical_id_heap(heap h)
279279
}
280280
}
281281

282-
phys_length += e->length;
283-
}
284-
}
285-
u64 bootstrap_size = init_bootstrap_heap(phys_length);
286-
287-
/* Carve the bootstrap heap out of a physical memory region. */
288-
for_regions(e) {
289-
if (e->type == REGION_PHYSICAL) {
290282
u64 base = pad(e->base, PAGESIZE);
291283
u64 end = e->base + e->length;
292284
u64 length = (end & ~MASK(PAGELOG)) - base;
293-
if (length >= bootstrap_size) {
294-
map(BOOTSTRAP_BASE, base, bootstrap_size, pageflags_writable(pageflags_memory()));
295-
e->base = base + bootstrap_size;
285+
if (length >= BOOTSTRAP_SIZE) {
286+
map(BOOTSTRAP_BASE, base, BOOTSTRAP_SIZE, pageflags_writable(pageflags_memory()));
287+
e->base = base + BOOTSTRAP_SIZE;
296288
e->length = end - e->base;
297289
break;
298290
}
299291
}
300292
}
301293

302-
id_heap physical = allocate_id_heap(h, h, PAGESIZE, true);
303294
boolean found = false;
304295
early_init_debug("physical memory:");
305296
for_regions(e) {
@@ -315,15 +306,14 @@ id_heap init_physical_id_heap(heap h)
315306
early_debug_u64(base + length);
316307
early_debug(")\n");
317308
#endif
318-
if (!id_heap_add_range(physical, base, length))
309+
if (!pageheap_add_range(base, length))
319310
halt(" - id_heap_add_range failed\n");
320311
found = true;
321312
}
322313
}
323314
if (!found) {
324315
halt("no valid physical regions found; halt\n");
325316
}
326-
return physical;
327317
}
328318

329319
static void setup_initmap(void)

platform/riscv-virt/service.c

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ u64 machine_random_seed(void)
2525

2626
extern void *START, *END;
2727

28-
id_heap init_physical_id_heap(heap h)
28+
void init_physical_heap(void)
2929
{
30-
init_debug("init_physical_id_heap\n");
30+
init_debug("init_physical_heap\n");
3131
u64 kernel_size = pad(u64_from_pointer(&END) -
3232
u64_from_pointer(&START), PAGESIZE);
3333

@@ -45,20 +45,17 @@ id_heap init_physical_id_heap(heap h)
4545

4646
u64 base = KERNEL_PHYS + kernel_size;
4747
u64 end = PHYSMEM_BASE + mem_size;
48-
u64 bootstrap_size = init_bootstrap_heap(end - base);
49-
map(BOOTSTRAP_BASE, base, bootstrap_size, pageflags_writable(pageflags_memory()));
50-
base += bootstrap_size;
48+
map(BOOTSTRAP_BASE, base, BOOTSTRAP_SIZE, pageflags_writable(pageflags_memory()));
49+
base += BOOTSTRAP_SIZE;
5150
init_debug("\nfree base ");
5251
init_debug_u64(base);
5352
init_debug("\nend ");
5453
init_debug_u64(end);
5554
init_debug("\n");
56-
id_heap physical = allocate_id_heap(h, h, PAGESIZE, true);
57-
if (!id_heap_add_range(physical, base, end - base)) {
58-
halt("init_physical_id_heap: failed to add range %R\n",
55+
if (!pageheap_add_range(base, end - base)) {
56+
halt("init_physical_heap: failed to add range %R\n",
5957
irange(base, end));
6058
}
61-
return physical;
6259
}
6360

6461
range kern_get_elf(void)

platform/virt/service.c

Lines changed: 26 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -69,33 +69,24 @@ static void uefi_mem_map_iterate(uefi_mem_map mem_map, range_handler h)
6969
}
7070
}
7171

72-
closure_function(1, 1, boolean, get_mem_size,
73-
u64 *, mem_size,
72+
closure_function(2, 1, boolean, get_bootstrap_base,
73+
range, rsvd, u64 *, base,
7474
range r)
7575
{
76-
*bound(mem_size) += range_span(r);
77-
return true;
78-
}
79-
80-
closure_function(3, 1, boolean, get_bootstrap_base,
81-
range, rsvd, u64, bootstrap_size, u64 *, base,
82-
range r)
83-
{
84-
u64 bootstrap_size = bound(bootstrap_size);
8576
range r1, r2;
8677
range_difference(r, bound(rsvd), &r1, &r2);
87-
if (range_span(r1) >= bootstrap_size) {
78+
if (range_span(r1) >= BOOTSTRAP_SIZE) {
8879
*bound(base) = r1.start;
8980
return false;
9081
}
91-
if (range_span(r2) >= bootstrap_size) {
82+
if (range_span(r2) >= BOOTSTRAP_SIZE) {
9283
*bound(base) = r2.start;
9384
return false;
9485
}
9586
return true;
9687
}
9788

98-
static void add_heap_range_internal(id_heap h, range r, range *remainder)
89+
static void add_heap_range_internal(range r, range *remainder)
9990
{
10091
if (remainder) {
10192
if (range_empty(*remainder)) {
@@ -121,31 +112,30 @@ static void add_heap_range_internal(id_heap h, range r, range *remainder)
121112
init_debug(" 0x");
122113
init_debug_u64(r.end);
123114
init_debug(")\n");
124-
id_heap_add_range(h, r.start, range_span(r));
115+
pageheap_add_range(r.start, range_span(r));
125116
}
126117

127-
static inline void add_heap_range_helper(id_heap h, range r, range rsvd, range *remainder)
118+
static inline void add_heap_range_helper(range r, range rsvd, range *remainder)
128119
{
129120
if (!range_empty(r)) {
130121
range r1, r2;
131122
range_difference(r, rsvd, &r1, &r2);
132123
if (!range_empty(r1))
133-
add_heap_range_internal(h, r1, remainder);
124+
add_heap_range_internal(r1, remainder);
134125
if (!range_empty(r2))
135-
add_heap_range_internal(h, r2, remainder);
126+
add_heap_range_internal(r2, remainder);
136127
}
137128
}
138129

139-
closure_function(4, 1, boolean, add_heap_range,
140-
id_heap, h, range, rsvd1, range, rsvd2, range *, remainder,
130+
closure_function(3, 1, boolean, add_heap_range,
131+
range, rsvd1, range, rsvd2, range *, remainder,
141132
range r)
142133
{
143-
id_heap h = bound(h);
144134
range *remainder = bound(remainder);
145135
range r1, r2;
146136
range_difference(r, bound(rsvd1), &r1, &r2);
147-
add_heap_range_helper(h, r1, bound(rsvd2), remainder);
148-
add_heap_range_helper(h, r2, bound(rsvd2), remainder);
137+
add_heap_range_helper(r1, bound(rsvd2), remainder);
138+
add_heap_range_helper(r2, bound(rsvd2), remainder);
149139
return true;
150140
}
151141

@@ -158,63 +148,51 @@ static u64 get_memory_size(void *dtb)
158148
}
159149

160150
extern void *START, *END;
161-
id_heap init_physical_id_heap(heap h)
151+
void init_physical_heap(void)
162152
{
163-
init_debug("init_physical_id_heap\n");
153+
init_debug("init_physical_heap\n");
164154
u64 kernel_size = pad(u64_from_pointer(&END) -
165155
u64_from_pointer(&START), PAGESIZE);
166156

167157
init_debug("init_setup_stack: kernel size ");
168158
init_debug_u64(kernel_size);
169159

170-
id_heap physical;
171160
if (boot_params.mem_map.map) {
172161
u64 map_base = u64_from_pointer(boot_params.mem_map.map);
173162
u64 map_size = pad((map_base & PAGEMASK) + boot_params.mem_map.map_size, PAGESIZE);
174163
map_base &= ~PAGEMASK;
175164
/* map_base has been identity-mapped in ueft_rt_init_virt() */
176-
u64 mem_size = 0;
177-
uefi_mem_map_iterate(&boot_params.mem_map, stack_closure(get_mem_size, &mem_size));
178-
init_debug("\nmem size ");
179-
init_debug_u64(mem_size);
180-
u64 bootstrap_size = init_bootstrap_heap(mem_size);
181165
range reserved = irange(DEVICETREE_BLOB_BASE + kernel_phys_offset,
182166
KERNEL_PHYS + kernel_size + kernel_phys_offset);
183167
u64 base = 0;
184168
uefi_mem_map_iterate(&boot_params.mem_map,
185-
stack_closure(get_bootstrap_base, reserved, bootstrap_size, &base));
169+
stack_closure(get_bootstrap_base, reserved, &base));
186170
init_debug("\nbootstrap base ");
187171
init_debug_u64(base);
188-
init_debug(", size ");
189-
init_debug_u64(bootstrap_size);
190172
init_debug("\n");
191173
assert(!(base & PAGEMASK));
192-
map(BOOTSTRAP_BASE, base, bootstrap_size, pageflags_writable(pageflags_memory()));
193-
physical = allocate_id_heap(h, h, PAGESIZE, true);
174+
map(BOOTSTRAP_BASE, base, BOOTSTRAP_SIZE, pageflags_writable(pageflags_memory()));
194175
range remainder = irange(0, 0);
195-
uefi_mem_map_iterate(&boot_params.mem_map, stack_closure(add_heap_range, physical, reserved,
196-
irangel(base, bootstrap_size),
176+
uefi_mem_map_iterate(&boot_params.mem_map, stack_closure(add_heap_range, reserved,
177+
irangel(base, BOOTSTRAP_SIZE),
197178
&remainder));
198-
add_heap_range_internal(physical, remainder, 0);
179+
add_heap_range_internal(remainder, 0);
199180
unmap(map_base, map_size);
200181
} else {
201182
u64 base = KERNEL_PHYS + kernel_size;
202183
u64 end = PHYSMEM_BASE + get_memory_size(pointer_from_u64(DEVICETREE_BLOB_BASE));
203-
u64 bootstrap_size = init_bootstrap_heap(end - base);
204-
map(BOOTSTRAP_BASE, base, bootstrap_size, pageflags_writable(pageflags_memory()));
205-
base += bootstrap_size;
184+
map(BOOTSTRAP_BASE, base, BOOTSTRAP_SIZE, pageflags_writable(pageflags_memory()));
185+
base += BOOTSTRAP_SIZE;
206186
init_debug("\nfree base ");
207187
init_debug_u64(base);
208188
init_debug("\nend ");
209189
init_debug_u64(end);
210190
init_debug("\n");
211-
physical = allocate_id_heap(h, h, PAGESIZE, true);
212-
if (!id_heap_add_range(physical, base, end - base)) {
213-
halt("init_physical_id_heap: failed to add range %R\n",
191+
if (!pageheap_add_range(base, end - base)) {
192+
halt("init_physical_heap: failed to add range %R\n",
214193
irange(base, end));
215194
}
216195
}
217-
return physical;
218196
}
219197

220198
range kern_get_elf(void)
@@ -255,7 +233,7 @@ static void ueft_rt_init_virt(void)
255233
u64 map_size = pad((map_base & PAGEMASK) + mem_map->map_size, PAGESIZE);
256234
map_base &= ~PAGEMASK;
257235
pageflags flags = pageflags_writable(pageflags_memory());
258-
map(map_base, map_base, map_size, flags); /* will be unmapped in init_physical_id_heap() */
236+
map(map_base, map_base, map_size, flags); /* will be unmapped in init_physical_heap() */
259237
int num_desc = mem_map->map_size / mem_map->desc_size;
260238
u64 rt_svc_offset = 0;
261239
for (int i = 0; i < num_desc; i++) {
@@ -490,7 +468,7 @@ void init_platform_devices(kernel_heaps kh)
490468
vector cpu_ids = cpus_init_ids(heap_general(kh));
491469
platform_dtb_parse(kh, cpu_ids);
492470
/* the device tree blob is never accessed from now on: reclaim the memory where it is located */
493-
id_heap_add_range(heap_physical(kh), DEVICETREE_BLOB_BASE + kernel_phys_offset,
471+
pageheap_add_range(DEVICETREE_BLOB_BASE + kernel_phys_offset,
494472
INIT_PAGEMEM - DEVICETREE_BLOB_BASE);
495473
struct console_driver *console_driver = 0;
496474
init_acpi_tables(kh);

src/config.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@
6767
#define PAGECACHE_SCAN_PERIOD_SECONDS 5
6868
#define PAGEHEAP_MEMORY_RESERVE (8 * MB)
6969
#define PAGEHEAP_LOWMEM_MEMORY_RESERVE (4 * MB)
70-
#define PAGEHEAP_LOWMEM_PAGESIZE (128*KB)
70+
#define PAGEHEAP_LOWMEM_PAGESIZE (1 * MB)
7171
#define LOW_MEMORY_THRESHOLD (64 * MB)
7272
#define SG_FRAG_BYTE_THRESHOLD (128*KB)
7373
#define PAGECACHE_MAX_SG_ENTRIES 8192

src/kernel/init.c

Lines changed: 10 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -56,21 +56,6 @@ static u64 bootstrap_alloc(heap h, bytes length)
5656
BSS_RO_AFTER_INIT static struct kernel_heaps heaps;
5757
BSS_RO_AFTER_INIT static vector shutdown_completions;
5858

59-
u64 init_bootstrap_heap(u64 phys_length)
60-
{
61-
u64 page_count = phys_length >> PAGELOG;
62-
63-
/* In theory, when initializing the physical heap, the bootstrap heap must accommodate 1 bit per
64-
* physical memory page (as needed by the id heap bitmap); but due to the way buffer extension
65-
* works, when an id heap is pre-allocated, its bitmap allocates twice the amount of memory
66-
* needed; thus, the bootstrap heap needs twice the theoretical amount of memory.
67-
* In addition, we need some extra space for various initial allocations. */
68-
u64 bootstrap_size = 8 * PAGESIZE + pad(page_count >> 2, PAGESIZE);
69-
70-
bootstrap_limit = BOOTSTRAP_BASE + bootstrap_size;
71-
return bootstrap_size;
72-
}
73-
7459
/* Kernel address space layout randomization.
7560
* Functions that call (directly or indirectly) this function must either not return to their
7661
* caller, or execute `return_offset(kas_kern_offset - kernel_phys_offset)` before returning to
@@ -115,9 +100,10 @@ void init_kernel_heaps(void)
115100
BSS_RO_AFTER_INIT static struct heap bootstrap;
116101
bootstrap.alloc = bootstrap_alloc;
117102
bootstrap.dealloc = leak;
103+
bootstrap_limit = BOOTSTRAP_BASE + BOOTSTRAP_SIZE;
118104

119-
heaps.physical = init_physical_id_heap(&bootstrap);
120-
assert(heaps.physical != INVALID_ADDRESS);
105+
heaps.physical = pageheap_init(&bootstrap);
106+
init_physical_heap();
121107

122108
heaps.linear_backed = allocate_linear_backed_heap(&bootstrap, heaps.physical, irange(0, 0));
123109
#if defined(MEMDEBUG_BACKED) || defined(MEMDEBUG_ALL)
@@ -138,18 +124,19 @@ void init_kernel_heaps(void)
138124
heaps.virtual_huge = create_id_heap(&bootstrap, &bootstrap, kmem_base,
139125
KMEM_LIMIT - kmem_base, HUGE_PAGESIZE, true);
140126

141-
/* Pre-allocate all memory that might be needed for the physical and virtual huge heaps, so that
127+
/* Pre-allocate all memory that might be needed for the virtual huge heap, so that
142128
* during runtime all allocations on the bootstrap heap come from a single source protected by a
143129
* lock (i.e. the virtual page heap). */
144-
id_heap_prealloc(heaps.physical);
145130
id_heap_prealloc(heaps.virtual_huge);
146131

147132
heaps.virtual_page = create_id_heap_backed(&bootstrap, &bootstrap,
148133
(heap)heaps.virtual_huge, PAGESIZE, true);
134+
u64 virt_base;
149135
boolean kernmem_equals_dmamem = (pageflags_kernel_data().w == pageflags_dma().w);
150136
if (kernmem_equals_dmamem) {
151137
heaps.page_backed = heaps.linear_backed;
152138
init_page_tables((heap)heaps.physical);
139+
virt_base = LINEAR_BACKED_BASE;
153140
} else {
154141
/* The linear_backed heap cannot be used for non-DMA kernel data, thus we need another
155142
* linear mapping for the page tables: do this mapping, then use it to create the
@@ -159,15 +146,14 @@ void init_kernel_heaps(void)
159146
#if defined(MEMDEBUG_BACKED) || defined(MEMDEBUG_ALL)
160147
heaps.page_backed = mem_debug_backed(&bootstrap, heaps.page_backed, PAGESIZE_2M, true);
161148
#endif
149+
virt_base = mapped_virt.start;
162150
}
163151

164152
boolean is_lowmem = is_low_memory_machine();
153+
pageheap_init_done(pointer_from_u64(virt_base),
154+
is_lowmem ? PAGEHEAP_LOWMEM_PAGESIZE : PAGESIZE_2M);
165155
u64 memory_reserve = is_lowmem ? PAGEHEAP_LOWMEM_MEMORY_RESERVE : PAGEHEAP_MEMORY_RESERVE;
166-
heaps.pages = allocate_objcache(&bootstrap,
167-
reserve_heap_wrapper(&bootstrap, (heap)heaps.page_backed,
168-
memory_reserve),
169-
PAGESIZE, is_lowmem ? PAGEHEAP_LOWMEM_PAGESIZE : PAGESIZE_2M,
170-
true);
156+
heaps.pages = reserve_heap_wrapper(&bootstrap, (heap)heaps.page_backed, memory_reserve);
171157
int max_mcache_order = is_lowmem ? MAX_LOWMEM_MCACHE_ORDER : MAX_MCACHE_ORDER;
172158
bytes pagesize = is_lowmem ? U64_FROM_BIT(max_mcache_order + 1) : PAGESIZE_2M;
173159
heaps.general = allocate_mcache(&bootstrap, (heap)heaps.page_backed, 5, max_mcache_order,
@@ -441,10 +427,6 @@ static u64 mm_clean(u64 clean_bytes)
441427
}
442428
spin_unlock(&mm_lock);
443429
u64 cleaned = clean_bytes - remain;
444-
if (cleaned)
445-
/* Memory cleaners may have deallocated page heap memory: drain the page heap, so that
446-
* deallocated memory can be returned to the physical heap. */
447-
cache_drain(init_heaps->pages, cleaned, 0);
448430
return cleaned;
449431
}
450432

src/kernel/kernel.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -597,10 +597,10 @@ static inline boolean sched_queue_empty(sched_queue sq)
597597
#define BREAKPOINT_READ_WRITE 11
598598

599599
#define BOOTSTRAP_BASE KMEM_BASE
600+
#define BOOTSTRAP_SIZE (8 * PAGESIZE)
600601

601602
void kaslr(void);
602-
u64 init_bootstrap_heap(u64 phys_length);
603-
id_heap init_physical_id_heap(heap h);
603+
void init_physical_heap(void);
604604
void init_kernel_heaps(void);
605605
void init_platform_devices(kernel_heaps kh);
606606
void init_cpuinfo_machine(cpuinfo ci, heap backed);
@@ -635,7 +635,7 @@ backed_heap allocate_page_backed_heap(heap meta, heap virtual, heap physical,
635635
u64 pagesize, boolean locking);
636636
void page_backed_dealloc_virtual(backed_heap bh, u64 x, bytes length);
637637

638-
backed_heap allocate_linear_backed_heap(heap meta, id_heap physical, range mapped_virt);
638+
backed_heap allocate_linear_backed_heap(heap meta, heap physical, range mapped_virt);
639639

640640
static inline boolean is_linear_backed_address(u64 address)
641641
{

0 commit comments

Comments
 (0)