VirtualBox

source: vbox/trunk/src/VBox/Runtime/r0drv/linux/alloc-r0drv-linux.c@ 90577

Last change on this file since 90577 was 90577, checked in by vboxsync, 4 years ago

Linux Host and Guest drivers: another attempt to introduce initial support for RHEL 8.5 kernels, bugref:4567.

CentOS kernel 4.18.0-326.el8 (RHEL 8,5) has backported commits from vanilla kernel 5.10+.
Fedora kernel 5.9.0-36.eln104 (RHEL 8,99) does not have some of these changes yet.
This commit attempts to move relevant code into RHEL 8,5 specific section. For some
parts of DRM code, changes are only affect RHEL 8,5 (8,99 is excluded) due to missing
commits (on 8,99) from newer kernels.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 14.9 KB
Line 
1/* $Id: alloc-r0drv-linux.c 90577 2021-08-09 09:57:00Z vboxsync $ */
2/** @file
3 * IPRT - Memory Allocation, Ring-0 Driver, Linux.
4 */
5
6/*
7 * Copyright (C) 2006-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*********************************************************************************************************************************
29* Header Files *
30*********************************************************************************************************************************/
31#include "the-linux-kernel.h"
32#include "internal/iprt.h"
33#include <iprt/mem.h>
34
35#include <iprt/assert.h>
36#include <iprt/errcore.h>
37#include "r0drv/alloc-r0drv.h"
38
39
40#if (defined(RT_ARCH_AMD64) || defined(DOXYGEN_RUNNING)) && !defined(RTMEMALLOC_EXEC_HEAP)
41# if RTLNX_VER_MIN(2,6,23) && RTLNX_VER_MAX(5,8,0) && !RTLNX_RHEL_MAJ_PREREQ(8,5)
42/**
43 * Starting with 2.6.23 we can use __get_vm_area and map_vm_area to allocate
44 * memory in the moduel range. This is preferrable to the exec heap below.
45 */
46# define RTMEMALLOC_EXEC_VM_AREA
47# else
48/**
49 * We need memory in the module range (~2GB to ~0) this can only be obtained
50 * thru APIs that are not exported (see module_alloc()).
51 *
52 * So, we'll have to create a quick and dirty heap here using BSS memory.
53 * Very annoying and it's going to restrict us!
54 */
55# define RTMEMALLOC_EXEC_HEAP
56# endif
57#endif
58
59#ifdef RTMEMALLOC_EXEC_HEAP
60# include <iprt/heap.h>
61# include <iprt/spinlock.h>
62# include <iprt/errcore.h>
63#endif
64
65#include "internal/initterm.h"
66
67
68/*********************************************************************************************************************************
69* Structures and Typedefs *
70*********************************************************************************************************************************/
71#ifdef RTMEMALLOC_EXEC_VM_AREA
72/**
73 * Extended header used for headers marked with RTMEMHDR_FLAG_EXEC_VM_AREA.
74 *
75 * This is used with allocating executable memory, for things like generated
76 * code and loaded modules.
77 */
78typedef struct RTMEMLNXHDREX
79{
80 /** The VM area for this allocation. */
81 struct vm_struct *pVmArea;
82 void *pvDummy;
83 /** The header we present to the generic API. */
84 RTMEMHDR Hdr;
85} RTMEMLNXHDREX;
86AssertCompileSize(RTMEMLNXHDREX, 32);
87/** Pointer to an extended memory header. */
88typedef RTMEMLNXHDREX *PRTMEMLNXHDREX;
89#endif
90
91
92/*********************************************************************************************************************************
93* Global Variables *
94*********************************************************************************************************************************/
95#ifdef RTMEMALLOC_EXEC_HEAP
96/** The heap. */
97static RTHEAPSIMPLE g_HeapExec = NIL_RTHEAPSIMPLE;
98/** Spinlock protecting the heap. */
99static RTSPINLOCK g_HeapExecSpinlock = NIL_RTSPINLOCK;
100#endif
101
102
103/**
104 * API for cleaning up the heap spinlock on IPRT termination.
105 * This is as RTMemExecDonate specific to AMD64 Linux/GNU.
106 */
107DECLHIDDEN(void) rtR0MemExecCleanup(void)
108{
109#ifdef RTMEMALLOC_EXEC_HEAP
110 RTSpinlockDestroy(g_HeapExecSpinlock);
111 g_HeapExecSpinlock = NIL_RTSPINLOCK;
112#endif
113}
114
115
116/**
117 * Donate read+write+execute memory to the exec heap.
118 *
119 * This API is specific to AMD64 and Linux/GNU. A kernel module that desires to
120 * use RTMemExecAlloc on AMD64 Linux/GNU will have to donate some statically
121 * allocated memory in the module if it wishes for GCC generated code to work.
122 * GCC can only generate modules that work in the address range ~2GB to ~0
123 * currently.
124 *
125 * The API only accept one single donation.
126 *
127 * @returns IPRT status code.
128 * @retval VERR_NOT_SUPPORTED if the code isn't enabled.
129 * @param pvMemory Pointer to the memory block.
130 * @param cb The size of the memory block.
131 */
132RTR0DECL(int) RTR0MemExecDonate(void *pvMemory, size_t cb)
133{
134#ifdef RTMEMALLOC_EXEC_HEAP
135 int rc;
136 AssertReturn(g_HeapExec == NIL_RTHEAPSIMPLE, VERR_WRONG_ORDER);
137
138 rc = RTSpinlockCreate(&g_HeapExecSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "RTR0MemExecDonate");
139 if (RT_SUCCESS(rc))
140 {
141 rc = RTHeapSimpleInit(&g_HeapExec, pvMemory, cb);
142 if (RT_FAILURE(rc))
143 rtR0MemExecCleanup();
144 }
145 return rc;
146#else
147 RT_NOREF_PV(pvMemory); RT_NOREF_PV(cb);
148 return VERR_NOT_SUPPORTED;
149#endif
150}
151RT_EXPORT_SYMBOL(RTR0MemExecDonate);
152
153
154
155#ifdef RTMEMALLOC_EXEC_VM_AREA
156/**
157 * Allocate executable kernel memory in the module range.
158 *
159 * @returns Pointer to a allocation header success. NULL on failure.
160 *
161 * @param cb The size the user requested.
162 */
163static PRTMEMHDR rtR0MemAllocExecVmArea(size_t cb)
164{
165 size_t const cbAlloc = RT_ALIGN_Z(sizeof(RTMEMLNXHDREX) + cb, PAGE_SIZE);
166 size_t const cPages = cbAlloc >> PAGE_SHIFT;
167 struct page **papPages;
168 struct vm_struct *pVmArea;
169 size_t iPage;
170
171 pVmArea = __get_vm_area(cbAlloc, VM_ALLOC, MODULES_VADDR, MODULES_END);
172 if (!pVmArea)
173 return NULL;
174 pVmArea->nr_pages = 0; /* paranoia? */
175 pVmArea->pages = NULL; /* paranoia? */
176
177 papPages = (struct page **)kmalloc(cPages * sizeof(papPages[0]), GFP_KERNEL | __GFP_NOWARN);
178 if (!papPages)
179 {
180 vunmap(pVmArea->addr);
181 return NULL;
182 }
183
184 for (iPage = 0; iPage < cPages; iPage++)
185 {
186 papPages[iPage] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN);
187 if (!papPages[iPage])
188 break;
189 }
190 if (iPage == cPages)
191 {
192 /*
193 * Map the pages.
194 *
195 * Not entirely sure we really need to set nr_pages and pages here, but
196 * they provide a very convenient place for storing something we need
197 * in the free function, if nothing else...
198 */
199# if RTLNX_VER_MAX(3,17,0)
200 struct page **papPagesIterator = papPages;
201# endif
202 pVmArea->nr_pages = cPages;
203 pVmArea->pages = papPages;
204 if (!map_vm_area(pVmArea, PAGE_KERNEL_EXEC,
205# if RTLNX_VER_MAX(3,17,0)
206 &papPagesIterator
207# else
208 papPages
209# endif
210 ))
211 {
212 PRTMEMLNXHDREX pHdrEx = (PRTMEMLNXHDREX)pVmArea->addr;
213 pHdrEx->pVmArea = pVmArea;
214 pHdrEx->pvDummy = NULL;
215 return &pHdrEx->Hdr;
216 }
217 /* bail out */
218# if RTLNX_VER_MAX(3,17,0)
219 pVmArea->nr_pages = papPagesIterator - papPages;
220# endif
221 }
222
223 vunmap(pVmArea->addr);
224
225 while (iPage-- > 0)
226 __free_page(papPages[iPage]);
227 kfree(papPages);
228
229 return NULL;
230}
231#endif /* RTMEMALLOC_EXEC_VM_AREA */
232
233
234/**
235 * OS specific allocation function.
236 */
237DECLHIDDEN(int) rtR0MemAllocEx(size_t cb, uint32_t fFlags, PRTMEMHDR *ppHdr)
238{
239 PRTMEMHDR pHdr;
240 IPRT_LINUX_SAVE_EFL_AC();
241
242 /*
243 * Allocate.
244 */
245 if (fFlags & RTMEMHDR_FLAG_EXEC)
246 {
247 if (fFlags & RTMEMHDR_FLAG_ANY_CTX)
248 return VERR_NOT_SUPPORTED;
249
250#if defined(RT_ARCH_AMD64)
251# ifdef RTMEMALLOC_EXEC_HEAP
252 if (g_HeapExec != NIL_RTHEAPSIMPLE)
253 {
254 RTSpinlockAcquire(g_HeapExecSpinlock);
255 pHdr = (PRTMEMHDR)RTHeapSimpleAlloc(g_HeapExec, cb + sizeof(*pHdr), 0);
256 RTSpinlockRelease(g_HeapExecSpinlock);
257 fFlags |= RTMEMHDR_FLAG_EXEC_HEAP;
258 }
259 else
260 pHdr = NULL;
261
262# elif defined(RTMEMALLOC_EXEC_VM_AREA)
263 pHdr = rtR0MemAllocExecVmArea(cb);
264 fFlags |= RTMEMHDR_FLAG_EXEC_VM_AREA;
265
266# else /* !RTMEMALLOC_EXEC_HEAP */
267# error "you don not want to go here..."
268 pHdr = (PRTMEMHDR)__vmalloc(cb + sizeof(*pHdr), GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN, MY_PAGE_KERNEL_EXEC);
269# endif /* !RTMEMALLOC_EXEC_HEAP */
270
271#elif defined(PAGE_KERNEL_EXEC) && defined(CONFIG_X86_PAE)
272 pHdr = (PRTMEMHDR)__vmalloc(cb + sizeof(*pHdr), GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN, MY_PAGE_KERNEL_EXEC);
273#else
274 pHdr = (PRTMEMHDR)vmalloc(cb + sizeof(*pHdr));
275#endif
276 }
277 else
278 {
279 if (
280#if 1 /* vmalloc has serious performance issues, avoid it. */
281 cb <= PAGE_SIZE*16 - sizeof(*pHdr)
282#else
283 cb <= PAGE_SIZE
284#endif
285 || (fFlags & RTMEMHDR_FLAG_ANY_CTX)
286 )
287 {
288 fFlags |= RTMEMHDR_FLAG_KMALLOC;
289 pHdr = kmalloc(cb + sizeof(*pHdr),
290 (fFlags & RTMEMHDR_FLAG_ANY_CTX_ALLOC) ? (GFP_ATOMIC | __GFP_NOWARN)
291 : (GFP_KERNEL | __GFP_NOWARN));
292 if (RT_UNLIKELY( !pHdr
293 && cb > PAGE_SIZE
294 && !(fFlags & RTMEMHDR_FLAG_ANY_CTX) ))
295 {
296 fFlags &= ~RTMEMHDR_FLAG_KMALLOC;
297 pHdr = vmalloc(cb + sizeof(*pHdr));
298 }
299 }
300 else
301 pHdr = vmalloc(cb + sizeof(*pHdr));
302 }
303 if (RT_UNLIKELY(!pHdr))
304 {
305 IPRT_LINUX_RESTORE_EFL_AC();
306 return VERR_NO_MEMORY;
307 }
308
309 /*
310 * Initialize.
311 */
312 pHdr->u32Magic = RTMEMHDR_MAGIC;
313 pHdr->fFlags = fFlags;
314 pHdr->cb = cb;
315 pHdr->cbReq = cb;
316
317 *ppHdr = pHdr;
318 IPRT_LINUX_RESTORE_EFL_AC();
319 return VINF_SUCCESS;
320}
321
322
323/**
324 * OS specific free function.
325 */
326DECLHIDDEN(void) rtR0MemFree(PRTMEMHDR pHdr)
327{
328 IPRT_LINUX_SAVE_EFL_AC();
329
330 pHdr->u32Magic += 1;
331 if (pHdr->fFlags & RTMEMHDR_FLAG_KMALLOC)
332 kfree(pHdr);
333#ifdef RTMEMALLOC_EXEC_HEAP
334 else if (pHdr->fFlags & RTMEMHDR_FLAG_EXEC_HEAP)
335 {
336 RTSpinlockAcquire(g_HeapExecSpinlock);
337 RTHeapSimpleFree(g_HeapExec, pHdr);
338 RTSpinlockRelease(g_HeapExecSpinlock);
339 }
340#endif
341#ifdef RTMEMALLOC_EXEC_VM_AREA
342 else if (pHdr->fFlags & RTMEMHDR_FLAG_EXEC_VM_AREA)
343 {
344 PRTMEMLNXHDREX pHdrEx = RT_FROM_MEMBER(pHdr, RTMEMLNXHDREX, Hdr);
345 size_t iPage = pHdrEx->pVmArea->nr_pages;
346 struct page **papPages = pHdrEx->pVmArea->pages;
347 void *pvMapping = pHdrEx->pVmArea->addr;
348
349 vunmap(pvMapping);
350
351 while (iPage-- > 0)
352 __free_page(papPages[iPage]);
353 kfree(papPages);
354 }
355#endif
356 else
357 vfree(pHdr);
358
359 IPRT_LINUX_RESTORE_EFL_AC();
360}
361
362
363
364/**
365 * Compute order. Some functions allocate 2^order pages.
366 *
367 * @returns order.
368 * @param cPages Number of pages.
369 */
370static int CalcPowerOf2Order(unsigned long cPages)
371{
372 int iOrder;
373 unsigned long cTmp;
374
375 for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
376 ;
377 if (cPages & ~(1 << iOrder))
378 ++iOrder;
379
380 return iOrder;
381}
382
383
384/**
385 * Allocates physical contiguous memory (below 4GB).
386 * The allocation is page aligned and the content is undefined.
387 *
388 * @returns Pointer to the memory block. This is page aligned.
389 * @param pPhys Where to store the physical address.
390 * @param cb The allocation size in bytes. This is always
391 * rounded up to PAGE_SIZE.
392 */
393RTR0DECL(void *) RTMemContAlloc(PRTCCPHYS pPhys, size_t cb)
394{
395 int cOrder;
396 unsigned cPages;
397 struct page *paPages;
398 void *pvRet;
399 IPRT_LINUX_SAVE_EFL_AC();
400
401 /*
402 * validate input.
403 */
404 Assert(VALID_PTR(pPhys));
405 Assert(cb > 0);
406
407 /*
408 * Allocate page pointer array.
409 */
410 cb = RT_ALIGN_Z(cb, PAGE_SIZE);
411 cPages = cb >> PAGE_SHIFT;
412 cOrder = CalcPowerOf2Order(cPages);
413#if (defined(RT_ARCH_AMD64) || defined(CONFIG_X86_PAE)) && defined(GFP_DMA32)
414 /* ZONE_DMA32: 0-4GB */
415 paPages = alloc_pages(GFP_DMA32 | __GFP_NOWARN, cOrder);
416 if (!paPages)
417#endif
418#ifdef RT_ARCH_AMD64
419 /* ZONE_DMA; 0-16MB */
420 paPages = alloc_pages(GFP_DMA | __GFP_NOWARN, cOrder);
421#else
422 /* ZONE_NORMAL: 0-896MB */
423 paPages = alloc_pages(GFP_USER | __GFP_NOWARN, cOrder);
424#endif
425 if (paPages)
426 {
427 /*
428 * Reserve the pages and mark them executable.
429 */
430 unsigned iPage;
431 for (iPage = 0; iPage < cPages; iPage++)
432 {
433 Assert(!PageHighMem(&paPages[iPage]));
434 if (iPage + 1 < cPages)
435 {
436 AssertMsg( (uintptr_t)phys_to_virt(page_to_phys(&paPages[iPage])) + PAGE_SIZE
437 == (uintptr_t)phys_to_virt(page_to_phys(&paPages[iPage + 1]))
438 && page_to_phys(&paPages[iPage]) + PAGE_SIZE
439 == page_to_phys(&paPages[iPage + 1]),
440 ("iPage=%i cPages=%u [0]=%#llx,%p [1]=%#llx,%p\n", iPage, cPages,
441 (long long)page_to_phys(&paPages[iPage]), phys_to_virt(page_to_phys(&paPages[iPage])),
442 (long long)page_to_phys(&paPages[iPage + 1]), phys_to_virt(page_to_phys(&paPages[iPage + 1])) ));
443 }
444
445 SetPageReserved(&paPages[iPage]);
446 }
447 *pPhys = page_to_phys(paPages);
448 pvRet = phys_to_virt(page_to_phys(paPages));
449 }
450 else
451 pvRet = NULL;
452
453 IPRT_LINUX_RESTORE_EFL_AC();
454 return pvRet;
455}
456RT_EXPORT_SYMBOL(RTMemContAlloc);
457
458
459/**
460 * Frees memory allocated using RTMemContAlloc().
461 *
462 * @param pv Pointer to return from RTMemContAlloc().
463 * @param cb The cb parameter passed to RTMemContAlloc().
464 */
465RTR0DECL(void) RTMemContFree(void *pv, size_t cb)
466{
467 if (pv)
468 {
469 int cOrder;
470 unsigned cPages;
471 unsigned iPage;
472 struct page *paPages;
473 IPRT_LINUX_SAVE_EFL_AC();
474
475 /* validate */
476 AssertMsg(!((uintptr_t)pv & PAGE_OFFSET_MASK), ("pv=%p\n", pv));
477 Assert(cb > 0);
478
479 /* calc order and get pages */
480 cb = RT_ALIGN_Z(cb, PAGE_SIZE);
481 cPages = cb >> PAGE_SHIFT;
482 cOrder = CalcPowerOf2Order(cPages);
483 paPages = virt_to_page(pv);
484
485 /*
486 * Restore page attributes freeing the pages.
487 */
488 for (iPage = 0; iPage < cPages; iPage++)
489 {
490 ClearPageReserved(&paPages[iPage]);
491 }
492 __free_pages(paPages, cOrder);
493 IPRT_LINUX_RESTORE_EFL_AC();
494 }
495}
496RT_EXPORT_SYMBOL(RTMemContFree);
497
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette