VirtualBox

Ticket #10709: memobj-r0drv-linux.c

File memobj-r0drv-linux.c, 51.3 KB (added by Andre Robatino, 12 years ago)

/usr/src/vboxhost-4.1.18/vboxdrv/r0drv/linux/memobj-r0drv-linux.c from F17 x86_64 host

Line 
1/* $Revision: 75790 $ */
2/** @file
3 * IPRT - Ring-0 Memory Objects, Linux.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include "the-linux-kernel.h"
32
33#include <iprt/memobj.h>
34#include <iprt/alloc.h>
35#include <iprt/assert.h>
36#include <iprt/log.h>
37#include <iprt/process.h>
38#include <iprt/string.h>
39#include "internal/memobj.h"
40
41
42/*******************************************************************************
43* Defined Constants And Macros *
44*******************************************************************************/
45/* early 2.6 kernels */
46#ifndef PAGE_SHARED_EXEC
47# define PAGE_SHARED_EXEC PAGE_SHARED
48#endif
49#ifndef PAGE_READONLY_EXEC
50# define PAGE_READONLY_EXEC PAGE_READONLY
51#endif
52
53/*
54 * 2.6.29+ kernels don't work with remap_pfn_range() anymore because
55 * track_pfn_vma_new() is apparently not defined for non-RAM pages.
56 * It should be safe to use vm_insert_page() older kernels as well.
57 */
58#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
59# define VBOX_USE_INSERT_PAGE
60#endif
61#if defined(CONFIG_X86_PAE) \
62 && ( defined(HAVE_26_STYLE_REMAP_PAGE_RANGE) \
63 || ( LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) \
64 && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)))
65# define VBOX_USE_PAE_HACK
66#endif
67
68
69/*******************************************************************************
70* Structures and Typedefs *
71*******************************************************************************/
72/**
73 * The Darwin version of the memory object structure.
74 */
75typedef struct RTR0MEMOBJLNX
76{
77 /** The core structure. */
78 RTR0MEMOBJINTERNAL Core;
79 /** Set if the allocation is contiguous.
80 * This means it has to be given back as one chunk. */
81 bool fContiguous;
82 /** Set if we've vmap'ed the memory into ring-0. */
83 bool fMappedToRing0;
84 /** The pages in the apPages array. */
85 size_t cPages;
86 /** Array of struct page pointers. (variable size) */
87 struct page *apPages[1];
88} RTR0MEMOBJLNX, *PRTR0MEMOBJLNX;
89
90
91static void rtR0MemObjLinuxFreePages(PRTR0MEMOBJLNX pMemLnx);
92
93
94/**
95 * Helper that converts from a RTR0PROCESS handle to a linux task.
96 *
97 * @returns The corresponding Linux task.
98 * @param R0Process IPRT ring-0 process handle.
99 */
100static struct task_struct *rtR0ProcessToLinuxTask(RTR0PROCESS R0Process)
101{
102 /** @todo fix rtR0ProcessToLinuxTask!! */
103 /** @todo many (all?) callers currently assume that we return 'current'! */
104 return R0Process == RTR0ProcHandleSelf() ? current : NULL;
105}
106
107
108/**
109 * Compute order. Some functions allocate 2^order pages.
110 *
111 * @returns order.
112 * @param cPages Number of pages.
113 */
114static int rtR0MemObjLinuxOrder(size_t cPages)
115{
116 int iOrder;
117 size_t cTmp;
118
119 for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
120 ;
121 if (cPages & ~((size_t)1 << iOrder))
122 ++iOrder;
123
124 return iOrder;
125}
126
127
128/**
129 * Converts from RTMEM_PROT_* to Linux PAGE_*.
130 *
131 * @returns Linux page protection constant.
132 * @param fProt The IPRT protection mask.
133 * @param fKernel Whether it applies to kernel or user space.
134 */
135static pgprot_t rtR0MemObjLinuxConvertProt(unsigned fProt, bool fKernel)
136{
137 switch (fProt)
138 {
139 default:
140 AssertMsgFailed(("%#x %d\n", fProt, fKernel));
141 case RTMEM_PROT_NONE:
142 return PAGE_NONE;
143
144 case RTMEM_PROT_READ:
145 return fKernel ? PAGE_KERNEL_RO : PAGE_READONLY;
146
147 case RTMEM_PROT_WRITE:
148 case RTMEM_PROT_WRITE | RTMEM_PROT_READ:
149 return fKernel ? PAGE_KERNEL : PAGE_SHARED;
150
151 case RTMEM_PROT_EXEC:
152 case RTMEM_PROT_EXEC | RTMEM_PROT_READ:
153#if defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
154 if (fKernel)
155 {
156 pgprot_t fPg = MY_PAGE_KERNEL_EXEC;
157 pgprot_val(fPg) &= ~_PAGE_RW;
158 return fPg;
159 }
160 return PAGE_READONLY_EXEC;
161#else
162 return fKernel ? MY_PAGE_KERNEL_EXEC : PAGE_READONLY_EXEC;
163#endif
164
165 case RTMEM_PROT_WRITE | RTMEM_PROT_EXEC:
166 case RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_READ:
167 return fKernel ? MY_PAGE_KERNEL_EXEC : PAGE_SHARED_EXEC;
168 }
169}
170
171
172/**
173 * Worker for rtR0MemObjNativeReserveUser and rtR0MemObjNativerMapUser that creates
174 * an empty user space mapping.
175 *
176 * We acquire the mmap_sem of the task!
177 *
178 * @returns Pointer to the mapping.
179 * (void *)-1 on failure.
180 * @param R3PtrFixed (RTR3PTR)-1 if anywhere, otherwise a specific location.
181 * @param cb The size of the mapping.
182 * @param uAlignment The alignment of the mapping.
183 * @param pTask The Linux task to create this mapping in.
184 * @param fProt The RTMEM_PROT_* mask.
185 */
186static void *rtR0MemObjLinuxDoMmap(RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment, struct task_struct *pTask, unsigned fProt)
187{
188 unsigned fLnxProt;
189 unsigned long ulAddr;
190
191 Assert((pTask == current)); /* do_mmap */
192
193 /*
194 * Convert from IPRT protection to mman.h PROT_ and call do_mmap.
195 */
196 fProt &= (RTMEM_PROT_NONE | RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC);
197 if (fProt == RTMEM_PROT_NONE)
198 fLnxProt = PROT_NONE;
199 else
200 {
201 fLnxProt = 0;
202 if (fProt & RTMEM_PROT_READ)
203 fLnxProt |= PROT_READ;
204 if (fProt & RTMEM_PROT_WRITE)
205 fLnxProt |= PROT_WRITE;
206 if (fProt & RTMEM_PROT_EXEC)
207 fLnxProt |= PROT_EXEC;
208 }
209
210 if (R3PtrFixed != (RTR3PTR)-1)
211 {
212#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)
213 ulAddr = vm_mmap(NULL, R3PtrFixed, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, 0);
214#else
215 down_write(&pTask->mm->mmap_sem);
216 ulAddr = do_mmap(NULL, R3PtrFixed, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, 0);
217 up_write(&pTask->mm->mmap_sem);
218#endif
219 }
220 else
221 {
222#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)
223 ulAddr = vm_mmap(NULL, 0, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS, 0);
224#else
225 down_write(&pTask->mm->mmap_sem);
226 ulAddr = do_mmap(NULL, 0, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS, 0);
227 up_write(&pTask->mm->mmap_sem);
228#endif
229 if ( !(ulAddr & ~PAGE_MASK)
230 && (ulAddr & (uAlignment - 1)))
231 {
232 /** @todo implement uAlignment properly... We'll probably need to make some dummy mappings to fill
233 * up alignment gaps. This is of course complicated by fragmentation (which we might have cause
234 * ourselves) and further by there begin two mmap strategies (top / bottom). */
235 /* For now, just ignore uAlignment requirements... */
236 }
237 }
238
239
240 if (ulAddr & ~PAGE_MASK) /* ~PAGE_MASK == PAGE_OFFSET_MASK */
241 return (void *)-1;
242 return (void *)ulAddr;
243}
244
245
246/**
247 * Worker that destroys a user space mapping.
248 * Undoes what rtR0MemObjLinuxDoMmap did.
249 *
250 * We acquire the mmap_sem of the task!
251 *
252 * @param pv The ring-3 mapping.
253 * @param cb The size of the mapping.
254 * @param pTask The Linux task to destroy this mapping in.
255 */
256static void rtR0MemObjLinuxDoMunmap(void *pv, size_t cb, struct task_struct *pTask)
257{
258#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)
259 Assert(pTask == current);
260 vm_munmap((unsigned long)pv, cb);
261#elif defined(USE_RHEL4_MUNMAP)
262 down_write(&pTask->mm->mmap_sem);
263 do_munmap(pTask->mm, (unsigned long)pv, cb, 0); /* should it be 1 or 0? */
264 up_write(&pTask->mm->mmap_sem);
265#else
266 down_write(&pTask->mm->mmap_sem);
267 do_munmap(pTask->mm, (unsigned long)pv, cb);
268 up_write(&pTask->mm->mmap_sem);
269#endif
270}
271
272
273/**
274 * Internal worker that allocates physical pages and creates the memory object for them.
275 *
276 * @returns IPRT status code.
277 * @param ppMemLnx Where to store the memory object pointer.
278 * @param enmType The object type.
279 * @param cb The number of bytes to allocate.
280 * @param uAlignment The alignment of the physical memory.
281 * Only valid if fContiguous == true, ignored otherwise.
282 * @param fFlagsLnx The page allocation flags (GPFs).
283 * @param fContiguous Whether the allocation must be contiguous.
284 */
285static int rtR0MemObjLinuxAllocPages(PRTR0MEMOBJLNX *ppMemLnx, RTR0MEMOBJTYPE enmType, size_t cb,
286 size_t uAlignment, unsigned fFlagsLnx, bool fContiguous)
287{
288 size_t iPage;
289 size_t const cPages = cb >> PAGE_SHIFT;
290 struct page *paPages;
291
292 /*
293 * Allocate a memory object structure that's large enough to contain
294 * the page pointer array.
295 */
296 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), enmType, NULL, cb);
297 if (!pMemLnx)
298 return VERR_NO_MEMORY;
299 pMemLnx->cPages = cPages;
300
301 if (cPages > 255)
302 {
303# ifdef __GFP_REPEAT
304 /* Try hard to allocate the memory, but the allocation attempt might fail. */
305 fFlagsLnx |= __GFP_REPEAT;
306# endif
307# ifdef __GFP_NOMEMALLOC
308 /* Introduced with Linux 2.6.12: Don't use emergency reserves */
309 fFlagsLnx |= __GFP_NOMEMALLOC;
310# endif
311 }
312
313 /*
314 * Allocate the pages.
315 * For small allocations we'll try contiguous first and then fall back on page by page.
316 */
317#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
318 if ( fContiguous
319 || cb <= PAGE_SIZE * 2)
320 {
321# ifdef VBOX_USE_INSERT_PAGE
322 paPages = alloc_pages(fFlagsLnx | __GFP_COMP, rtR0MemObjLinuxOrder(cPages));
323# else
324 paPages = alloc_pages(fFlagsLnx, rtR0MemObjLinuxOrder(cPages));
325# endif
326 if (paPages)
327 {
328 fContiguous = true;
329 for (iPage = 0; iPage < cPages; iPage++)
330 pMemLnx->apPages[iPage] = &paPages[iPage];
331 }
332 else if (fContiguous)
333 {
334 rtR0MemObjDelete(&pMemLnx->Core);
335 return VERR_NO_MEMORY;
336 }
337 }
338
339 if (!fContiguous)
340 {
341 for (iPage = 0; iPage < cPages; iPage++)
342 {
343 pMemLnx->apPages[iPage] = alloc_page(fFlagsLnx);
344 if (RT_UNLIKELY(!pMemLnx->apPages[iPage]))
345 {
346 while (iPage-- > 0)
347 __free_page(pMemLnx->apPages[iPage]);
348 rtR0MemObjDelete(&pMemLnx->Core);
349 return VERR_NO_MEMORY;
350 }
351 }
352 }
353
354#else /* < 2.4.22 */
355 /** @todo figure out why we didn't allocate page-by-page on 2.4.21 and older... */
356 paPages = alloc_pages(fFlagsLnx, rtR0MemObjLinuxOrder(cPages));
357 if (!paPages)
358 {
359 rtR0MemObjDelete(&pMemLnx->Core);
360 return VERR_NO_MEMORY;
361 }
362 for (iPage = 0; iPage < cPages; iPage++)
363 {
364 pMemLnx->apPages[iPage] = &paPages[iPage];
365 MY_SET_PAGES_EXEC(pMemLnx->apPages[iPage], 1);
366 if (PageHighMem(pMemLnx->apPages[iPage]))
367 BUG();
368 }
369
370 fContiguous = true;
371#endif /* < 2.4.22 */
372 pMemLnx->fContiguous = fContiguous;
373
374 /*
375 * Reserve the pages.
376 */
377 for (iPage = 0; iPage < cPages; iPage++)
378 SetPageReserved(pMemLnx->apPages[iPage]);
379
380 /*
381 * Note that the physical address of memory allocated with alloc_pages(flags, order)
382 * is always 2^(PAGE_SHIFT+order)-aligned.
383 */
384 if ( fContiguous
385 && uAlignment > PAGE_SIZE)
386 {
387 /*
388 * Check for alignment constraints. The physical address of memory allocated with
389 * alloc_pages(flags, order) is always 2^(PAGE_SHIFT+order)-aligned.
390 */
391 if (RT_UNLIKELY(page_to_phys(pMemLnx->apPages[0]) & (uAlignment - 1)))
392 {
393 /*
394 * This should never happen!
395 */
396 printk("rtR0MemObjLinuxAllocPages(cb=0x%lx, uAlignment=0x%lx): alloc_pages(..., %d) returned physical memory at 0x%lx!\n",
397 (unsigned long)cb, (unsigned long)uAlignment, rtR0MemObjLinuxOrder(cPages), (unsigned long)page_to_phys(pMemLnx->apPages[0]));
398 rtR0MemObjLinuxFreePages(pMemLnx);
399 return VERR_NO_MEMORY;
400 }
401 }
402
403 *ppMemLnx = pMemLnx;
404 return VINF_SUCCESS;
405}
406
407
408/**
409 * Frees the physical pages allocated by the rtR0MemObjLinuxAllocPages() call.
410 *
411 * This method does NOT free the object.
412 *
413 * @param pMemLnx The object which physical pages should be freed.
414 */
415static void rtR0MemObjLinuxFreePages(PRTR0MEMOBJLNX pMemLnx)
416{
417 size_t iPage = pMemLnx->cPages;
418 if (iPage > 0)
419 {
420 /*
421 * Restore the page flags.
422 */
423 while (iPage-- > 0)
424 {
425 ClearPageReserved(pMemLnx->apPages[iPage]);
426#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
427#else
428 MY_SET_PAGES_NOEXEC(pMemLnx->apPages[iPage], 1);
429#endif
430 }
431
432 /*
433 * Free the pages.
434 */
435#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
436 if (!pMemLnx->fContiguous)
437 {
438 iPage = pMemLnx->cPages;
439 while (iPage-- > 0)
440 __free_page(pMemLnx->apPages[iPage]);
441 }
442 else
443#endif
444 __free_pages(pMemLnx->apPages[0], rtR0MemObjLinuxOrder(pMemLnx->cPages));
445
446 pMemLnx->cPages = 0;
447 }
448}
449
450
451/**
452 * Maps the allocation into ring-0.
453 *
454 * This will update the RTR0MEMOBJLNX::Core.pv and RTR0MEMOBJ::fMappedToRing0 members.
455 *
456 * Contiguous mappings that isn't in 'high' memory will already be mapped into kernel
457 * space, so we'll use that mapping if possible. If execute access is required, we'll
458 * play safe and do our own mapping.
459 *
460 * @returns IPRT status code.
461 * @param pMemLnx The linux memory object to map.
462 * @param fExecutable Whether execute access is required.
463 */
464static int rtR0MemObjLinuxVMap(PRTR0MEMOBJLNX pMemLnx, bool fExecutable)
465{
466 int rc = VINF_SUCCESS;
467
468 /*
469 * Choose mapping strategy.
470 */
471 bool fMustMap = fExecutable
472 || !pMemLnx->fContiguous;
473 if (!fMustMap)
474 {
475 size_t iPage = pMemLnx->cPages;
476 while (iPage-- > 0)
477 if (PageHighMem(pMemLnx->apPages[iPage]))
478 {
479 fMustMap = true;
480 break;
481 }
482 }
483
484 Assert(!pMemLnx->Core.pv);
485 Assert(!pMemLnx->fMappedToRing0);
486
487 if (fMustMap)
488 {
489 /*
490 * Use vmap - 2.4.22 and later.
491 */
492#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
493 pgprot_t fPg;
494 pgprot_val(fPg) = _PAGE_PRESENT | _PAGE_RW;
495# ifdef _PAGE_NX
496 if (!fExecutable)
497 pgprot_val(fPg) |= _PAGE_NX;
498# endif
499
500# ifdef VM_MAP
501 pMemLnx->Core.pv = vmap(&pMemLnx->apPages[0], pMemLnx->cPages, VM_MAP, fPg);
502# else
503 pMemLnx->Core.pv = vmap(&pMemLnx->apPages[0], pMemLnx->cPages, VM_ALLOC, fPg);
504# endif
505 if (pMemLnx->Core.pv)
506 pMemLnx->fMappedToRing0 = true;
507 else
508 rc = VERR_MAP_FAILED;
509#else /* < 2.4.22 */
510 rc = VERR_NOT_SUPPORTED;
511#endif
512 }
513 else
514 {
515 /*
516 * Use the kernel RAM mapping.
517 */
518 pMemLnx->Core.pv = phys_to_virt(page_to_phys(pMemLnx->apPages[0]));
519 Assert(pMemLnx->Core.pv);
520 }
521
522 return rc;
523}
524
525
526/**
527 * Undoes what rtR0MemObjLinuxVMap() did.
528 *
529 * @param pMemLnx The linux memory object.
530 */
531static void rtR0MemObjLinuxVUnmap(PRTR0MEMOBJLNX pMemLnx)
532{
533#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
534 if (pMemLnx->fMappedToRing0)
535 {
536 Assert(pMemLnx->Core.pv);
537 vunmap(pMemLnx->Core.pv);
538 pMemLnx->fMappedToRing0 = false;
539 }
540#else /* < 2.4.22 */
541 Assert(!pMemLnx->fMappedToRing0);
542#endif
543 pMemLnx->Core.pv = NULL;
544}
545
546
547DECLHIDDEN(int) rtR0MemObjNativeFree(RTR0MEMOBJ pMem)
548{
549 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)pMem;
550
551 /*
552 * Release any memory that we've allocated or locked.
553 */
554 switch (pMemLnx->Core.enmType)
555 {
556 case RTR0MEMOBJTYPE_LOW:
557 case RTR0MEMOBJTYPE_PAGE:
558 case RTR0MEMOBJTYPE_CONT:
559 case RTR0MEMOBJTYPE_PHYS:
560 case RTR0MEMOBJTYPE_PHYS_NC:
561 rtR0MemObjLinuxVUnmap(pMemLnx);
562 rtR0MemObjLinuxFreePages(pMemLnx);
563 break;
564
565 case RTR0MEMOBJTYPE_LOCK:
566 if (pMemLnx->Core.u.Lock.R0Process != NIL_RTR0PROCESS)
567 {
568 struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
569 size_t iPage;
570 Assert(pTask);
571 if (pTask && pTask->mm)
572 down_read(&pTask->mm->mmap_sem);
573
574 iPage = pMemLnx->cPages;
575 while (iPage-- > 0)
576 {
577 if (!PageReserved(pMemLnx->apPages[iPage]))
578 SetPageDirty(pMemLnx->apPages[iPage]);
579 page_cache_release(pMemLnx->apPages[iPage]);
580 }
581
582 if (pTask && pTask->mm)
583 up_read(&pTask->mm->mmap_sem);
584 }
585 /* else: kernel memory - nothing to do here. */
586 break;
587
588 case RTR0MEMOBJTYPE_RES_VIRT:
589 Assert(pMemLnx->Core.pv);
590 if (pMemLnx->Core.u.ResVirt.R0Process != NIL_RTR0PROCESS)
591 {
592 struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
593 Assert(pTask);
594 if (pTask && pTask->mm)
595 rtR0MemObjLinuxDoMunmap(pMemLnx->Core.pv, pMemLnx->Core.cb, pTask);
596 }
597 else
598 {
599 vunmap(pMemLnx->Core.pv);
600
601 Assert(pMemLnx->cPages == 1 && pMemLnx->apPages[0] != NULL);
602 __free_page(pMemLnx->apPages[0]);
603 pMemLnx->apPages[0] = NULL;
604 pMemLnx->cPages = 0;
605 }
606 pMemLnx->Core.pv = NULL;
607 break;
608
609 case RTR0MEMOBJTYPE_MAPPING:
610 Assert(pMemLnx->cPages == 0); Assert(pMemLnx->Core.pv);
611 if (pMemLnx->Core.u.ResVirt.R0Process != NIL_RTR0PROCESS)
612 {
613 struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
614 Assert(pTask);
615 if (pTask && pTask->mm)
616 rtR0MemObjLinuxDoMunmap(pMemLnx->Core.pv, pMemLnx->Core.cb, pTask);
617 }
618 else
619 vunmap(pMemLnx->Core.pv);
620 pMemLnx->Core.pv = NULL;
621 break;
622
623 default:
624 AssertMsgFailed(("enmType=%d\n", pMemLnx->Core.enmType));
625 return VERR_INTERNAL_ERROR;
626 }
627 return VINF_SUCCESS;
628}
629
630
631DECLHIDDEN(int) rtR0MemObjNativeAllocPage(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
632{
633 PRTR0MEMOBJLNX pMemLnx;
634 int rc;
635
636#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
637 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_PAGE, cb, PAGE_SIZE, GFP_HIGHUSER, false /* non-contiguous */);
638#else
639 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_PAGE, cb, PAGE_SIZE, GFP_USER, false /* non-contiguous */);
640#endif
641 if (RT_SUCCESS(rc))
642 {
643 rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
644 if (RT_SUCCESS(rc))
645 {
646 *ppMem = &pMemLnx->Core;
647 return rc;
648 }
649
650 rtR0MemObjLinuxFreePages(pMemLnx);
651 rtR0MemObjDelete(&pMemLnx->Core);
652 }
653
654 return rc;
655}
656
657
658DECLHIDDEN(int) rtR0MemObjNativeAllocLow(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
659{
660 PRTR0MEMOBJLNX pMemLnx;
661 int rc;
662
663 /* Try to avoid GFP_DMA. GFM_DMA32 was introduced with Linux 2.6.15. */
664#if (defined(RT_ARCH_AMD64) || defined(CONFIG_X86_PAE)) && defined(GFP_DMA32)
665 /* ZONE_DMA32: 0-4GB */
666 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, PAGE_SIZE, GFP_DMA32, false /* non-contiguous */);
667 if (RT_FAILURE(rc))
668#endif
669#ifdef RT_ARCH_AMD64
670 /* ZONE_DMA: 0-16MB */
671 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, PAGE_SIZE, GFP_DMA, false /* non-contiguous */);
672#else
673# ifdef CONFIG_X86_PAE
674# endif
675 /* ZONE_NORMAL: 0-896MB */
676 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, PAGE_SIZE, GFP_USER, false /* non-contiguous */);
677#endif
678 if (RT_SUCCESS(rc))
679 {
680 rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
681 if (RT_SUCCESS(rc))
682 {
683 *ppMem = &pMemLnx->Core;
684 return rc;
685 }
686
687 rtR0MemObjLinuxFreePages(pMemLnx);
688 rtR0MemObjDelete(&pMemLnx->Core);
689 }
690
691 return rc;
692}
693
694
695DECLHIDDEN(int) rtR0MemObjNativeAllocCont(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
696{
697 PRTR0MEMOBJLNX pMemLnx;
698 int rc;
699
700#if (defined(RT_ARCH_AMD64) || defined(CONFIG_X86_PAE)) && defined(GFP_DMA32)
701 /* ZONE_DMA32: 0-4GB */
702 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, PAGE_SIZE, GFP_DMA32, true /* contiguous */);
703 if (RT_FAILURE(rc))
704#endif
705#ifdef RT_ARCH_AMD64
706 /* ZONE_DMA: 0-16MB */
707 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, PAGE_SIZE, GFP_DMA, true /* contiguous */);
708#else
709 /* ZONE_NORMAL (32-bit hosts): 0-896MB */
710 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, PAGE_SIZE, GFP_USER, true /* contiguous */);
711#endif
712 if (RT_SUCCESS(rc))
713 {
714 rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
715 if (RT_SUCCESS(rc))
716 {
717#if defined(RT_STRICT) && (defined(RT_ARCH_AMD64) || defined(CONFIG_HIGHMEM64G))
718 size_t iPage = pMemLnx->cPages;
719 while (iPage-- > 0)
720 Assert(page_to_phys(pMemLnx->apPages[iPage]) < _4G);
721#endif
722 pMemLnx->Core.u.Cont.Phys = page_to_phys(pMemLnx->apPages[0]);
723 *ppMem = &pMemLnx->Core;
724 return rc;
725 }
726
727 rtR0MemObjLinuxFreePages(pMemLnx);
728 rtR0MemObjDelete(&pMemLnx->Core);
729 }
730
731 return rc;
732}
733
734
735/**
736 * Worker for rtR0MemObjLinuxAllocPhysSub that tries one allocation strategy.
737 *
738 * @returns IPRT status.
739 * @param ppMemLnx Where to
740 * @param enmType The object type.
741 * @param cb The size of the allocation.
742 * @param uAlignment The alignment of the physical memory.
743 * Only valid for fContiguous == true, ignored otherwise.
744 * @param PhysHighest See rtR0MemObjNativeAllocPhys.
745 * @param fGfp The Linux GFP flags to use for the allocation.
746 */
747static int rtR0MemObjLinuxAllocPhysSub2(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJTYPE enmType,
748 size_t cb, size_t uAlignment, RTHCPHYS PhysHighest, unsigned fGfp)
749{
750 PRTR0MEMOBJLNX pMemLnx;
751 int rc;
752
753 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, enmType, cb, uAlignment, fGfp,
754 enmType == RTR0MEMOBJTYPE_PHYS /* contiguous / non-contiguous */);
755 if (RT_FAILURE(rc))
756 return rc;
757
758 /*
759 * Check the addresses if necessary. (Can be optimized a bit for PHYS.)
760 */
761 if (PhysHighest != NIL_RTHCPHYS)
762 {
763 size_t iPage = pMemLnx->cPages;
764 while (iPage-- > 0)
765 if (page_to_phys(pMemLnx->apPages[iPage]) >= PhysHighest)
766 {
767 rtR0MemObjLinuxFreePages(pMemLnx);
768 rtR0MemObjDelete(&pMemLnx->Core);
769 return VERR_NO_MEMORY;
770 }
771 }
772
773 /*
774 * Complete the object.
775 */
776 if (enmType == RTR0MEMOBJTYPE_PHYS)
777 {
778 pMemLnx->Core.u.Phys.PhysBase = page_to_phys(pMemLnx->apPages[0]);
779 pMemLnx->Core.u.Phys.fAllocated = true;
780 }
781 *ppMem = &pMemLnx->Core;
782 return rc;
783}
784
785
786/**
787 * Worker for rtR0MemObjNativeAllocPhys and rtR0MemObjNativeAllocPhysNC.
788 *
789 * @returns IPRT status.
790 * @param ppMem Where to store the memory object pointer on success.
791 * @param enmType The object type.
792 * @param cb The size of the allocation.
793 * @param uAlignment The alignment of the physical memory.
794 * Only valid for enmType == RTR0MEMOBJTYPE_PHYS, ignored otherwise.
795 * @param PhysHighest See rtR0MemObjNativeAllocPhys.
796 */
797static int rtR0MemObjLinuxAllocPhysSub(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJTYPE enmType,
798 size_t cb, size_t uAlignment, RTHCPHYS PhysHighest)
799{
800 int rc;
801
802 /*
803 * There are two clear cases and that's the <=16MB and anything-goes ones.
804 * When the physical address limit is somewhere in-between those two we'll
805 * just have to try, starting with HIGHUSER and working our way thru the
806 * different types, hoping we'll get lucky.
807 *
808 * We should probably move this physical address restriction logic up to
809 * the page alloc function as it would be more efficient there. But since
810 * we don't expect this to be a performance issue just yet it can wait.
811 */
812 if (PhysHighest == NIL_RTHCPHYS)
813 /* ZONE_HIGHMEM: the whole physical memory */
814 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_HIGHUSER);
815 else if (PhysHighest <= _1M * 16)
816 /* ZONE_DMA: 0-16MB */
817 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_DMA);
818 else
819 {
820 rc = VERR_NO_MEMORY;
821 if (RT_FAILURE(rc))
822 /* ZONE_HIGHMEM: the whole physical memory */
823 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_HIGHUSER);
824 if (RT_FAILURE(rc))
825 /* ZONE_NORMAL: 0-896MB */
826 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_USER);
827#ifdef GFP_DMA32
828 if (RT_FAILURE(rc))
829 /* ZONE_DMA32: 0-4GB */
830 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_DMA32);
831#endif
832 if (RT_FAILURE(rc))
833 /* ZONE_DMA: 0-16MB */
834 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_DMA);
835 }
836 return rc;
837}
838
839
840/**
841 * Translates a kernel virtual address to a linux page structure by walking the
842 * page tables.
843 *
844 * @note We do assume that the page tables will not change as we are walking
845 * them. This assumption is rather forced by the fact that I could not
846 * immediately see any way of preventing this from happening. So, we
847 * take some extra care when accessing them.
848 *
849 * Because of this, we don't want to use this function on memory where
850 * attribute changes to nearby pages is likely to cause large pages to
851 * be used or split up. So, don't use this for the linear mapping of
852 * physical memory.
853 *
854 * @returns Pointer to the page structur or NULL if it could not be found.
855 * @param pv The kernel virtual address.
856 */
857static struct page *rtR0MemObjLinuxVirtToPage(void *pv)
858{
859 unsigned long ulAddr = (unsigned long)pv;
860 unsigned long pfn;
861 struct page *pPage;
862 pte_t *pEntry;
863 union
864 {
865 pgd_t Global;
866#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
867 pud_t Upper;
868#endif
869 pmd_t Middle;
870 pte_t Entry;
871 } u;
872
873 /* Should this happen in a situation this code will be called in? And if
874 * so, can it change under our feet? See also
875 * "Documentation/vm/active_mm.txt" in the kernel sources. */
876 if (RT_UNLIKELY(!current->active_mm))
877 return NULL;
878 u.Global = *pgd_offset(current->active_mm, ulAddr);
879 if (RT_UNLIKELY(pgd_none(u.Global)))
880 return NULL;
881
882#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
883 u.Upper = *pud_offset(&u.Global, ulAddr);
884 if (RT_UNLIKELY(pud_none(u.Upper)))
885 return NULL;
886# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25)
887 if (pud_large(u.Upper))
888 {
889 pPage = pud_page(u.Upper);
890 AssertReturn(pPage, NULL);
891 pfn = page_to_pfn(pPage); /* doing the safe way... */
892 pfn += (ulAddr >> PAGE_SHIFT) & ((UINT32_C(1) << (PUD_SHIFT - PAGE_SHIFT)) - 1);
893 return pfn_to_page(pfn);
894 }
895# endif
896
897 u.Middle = *pmd_offset(&u.Upper, ulAddr);
898#else /* < 2.6.11 */
899 u.Middle = *pmd_offset(&u.Global, ulAddr);
900#endif /* < 2.6.11 */
901 if (RT_UNLIKELY(pmd_none(u.Middle)))
902 return NULL;
903#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
904 if (pmd_large(u.Middle))
905 {
906 pPage = pmd_page(u.Middle);
907 AssertReturn(pPage, NULL);
908 pfn = page_to_pfn(pPage); /* doing the safe way... */
909 pfn += (ulAddr >> PAGE_SHIFT) & ((UINT32_C(1) << (PMD_SHIFT - PAGE_SHIFT)) - 1);
910 return pfn_to_page(pfn);
911 }
912#endif
913
914/* As usual, RHEL 3 had pte_offset_map earlier. */
915#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 5) || defined(pte_offset_map)
916 pEntry = pte_offset_map(&u.Middle, ulAddr);
917#else
918 pEntry = pte_offset(&u.Middle, ulAddr);
919#endif
920 if (RT_UNLIKELY(!pEntry))
921 return NULL;
922 u.Entry = *pEntry;
923#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 5) || defined(pte_offset_map)
924 pte_unmap(pEntry);
925#endif
926
927 if (RT_UNLIKELY(!pte_present(u.Entry)))
928 return NULL;
929 return pte_page(u.Entry);
930}
931
932
933DECLHIDDEN(int) rtR0MemObjNativeAllocPhys(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, RTHCPHYS PhysHighest, size_t uAlignment)
934{
935 return rtR0MemObjLinuxAllocPhysSub(ppMem, RTR0MEMOBJTYPE_PHYS, cb, uAlignment, PhysHighest);
936}
937
938
939DECLHIDDEN(int) rtR0MemObjNativeAllocPhysNC(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, RTHCPHYS PhysHighest)
940{
941 return rtR0MemObjLinuxAllocPhysSub(ppMem, RTR0MEMOBJTYPE_PHYS_NC, cb, PAGE_SIZE, PhysHighest);
942}
943
944
945DECLHIDDEN(int) rtR0MemObjNativeEnterPhys(PPRTR0MEMOBJINTERNAL ppMem, RTHCPHYS Phys, size_t cb, uint32_t uCachePolicy)
946{
947 /*
948 * All we need to do here is to validate that we can use
949 * ioremap on the specified address (32/64-bit dma_addr_t).
950 */
951 PRTR0MEMOBJLNX pMemLnx;
952 dma_addr_t PhysAddr = Phys;
953 AssertMsgReturn(PhysAddr == Phys, ("%#llx\n", (unsigned long long)Phys), VERR_ADDRESS_TOO_BIG);
954
955 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_PHYS, NULL, cb);
956 if (!pMemLnx)
957 return VERR_NO_MEMORY;
958
959 pMemLnx->Core.u.Phys.PhysBase = PhysAddr;
960 pMemLnx->Core.u.Phys.fAllocated = false;
961 pMemLnx->Core.u.Phys.uCachePolicy = uCachePolicy;
962 Assert(!pMemLnx->cPages);
963 *ppMem = &pMemLnx->Core;
964 return VINF_SUCCESS;
965}
966
967
968DECLHIDDEN(int) rtR0MemObjNativeLockUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3Ptr, size_t cb, uint32_t fAccess, RTR0PROCESS R0Process)
969{
970 const int cPages = cb >> PAGE_SHIFT;
971 struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
972 struct vm_area_struct **papVMAs;
973 PRTR0MEMOBJLNX pMemLnx;
974 int rc = VERR_NO_MEMORY;
975 NOREF(fAccess);
976
977 /*
978 * Check for valid task and size overflows.
979 */
980 if (!pTask)
981 return VERR_NOT_SUPPORTED;
982 if (((size_t)cPages << PAGE_SHIFT) != cb)
983 return VERR_OUT_OF_RANGE;
984
985 /*
986 * Allocate the memory object and a temporary buffer for the VMAs.
987 */
988 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), RTR0MEMOBJTYPE_LOCK, (void *)R3Ptr, cb);
989 if (!pMemLnx)
990 return VERR_NO_MEMORY;
991
992 papVMAs = (struct vm_area_struct **)RTMemAlloc(sizeof(*papVMAs) * cPages);
993 if (papVMAs)
994 {
995 down_read(&pTask->mm->mmap_sem);
996
997 /*
998 * Get user pages.
999 */
1000 rc = get_user_pages(pTask, /* Task for fault accounting. */
1001 pTask->mm, /* Whose pages. */
1002 R3Ptr, /* Where from. */
1003 cPages, /* How many pages. */
1004 1, /* Write to memory. */
1005 0, /* force. */
1006 &pMemLnx->apPages[0], /* Page array. */
1007 papVMAs); /* vmas */
1008 if (rc == cPages)
1009 {
1010 /*
1011 * Flush dcache (required?), protect against fork and _really_ pin the page
1012 * table entries. get_user_pages() will protect against swapping out the
1013 * pages but it will NOT protect against removing page table entries. This
1014 * can be achieved with
1015 * - using mlock / mmap(..., MAP_LOCKED, ...) from userland. This requires
1016 * an appropriate limit set up with setrlimit(..., RLIMIT_MEMLOCK, ...).
1017 * Usual Linux distributions support only a limited size of locked pages
1018 * (e.g. 32KB).
1019 * - setting the PageReserved bit (as we do in rtR0MemObjLinuxAllocPages()
1020 * or by
1021 * - setting the VM_LOCKED flag. This is the same as doing mlock() without
1022 * a range check.
1023 */
1024 /** @todo The Linux fork() protection will require more work if this API
1025 * is to be used for anything but locking VM pages. */
1026 while (rc-- > 0)
1027 {
1028 flush_dcache_page(pMemLnx->apPages[rc]);
1029 papVMAs[rc]->vm_flags |= (VM_DONTCOPY | VM_LOCKED);
1030 }
1031
1032 up_read(&pTask->mm->mmap_sem);
1033
1034 RTMemFree(papVMAs);
1035
1036 pMemLnx->Core.u.Lock.R0Process = R0Process;
1037 pMemLnx->cPages = cPages;
1038 Assert(!pMemLnx->fMappedToRing0);
1039 *ppMem = &pMemLnx->Core;
1040
1041 return VINF_SUCCESS;
1042 }
1043
1044 /*
1045 * Failed - we need to unlock any pages that we succeeded to lock.
1046 */
1047 while (rc-- > 0)
1048 {
1049 if (!PageReserved(pMemLnx->apPages[rc]))
1050 SetPageDirty(pMemLnx->apPages[rc]);
1051 page_cache_release(pMemLnx->apPages[rc]);
1052 }
1053
1054 up_read(&pTask->mm->mmap_sem);
1055
1056 RTMemFree(papVMAs);
1057 rc = VERR_LOCK_FAILED;
1058 }
1059
1060 rtR0MemObjDelete(&pMemLnx->Core);
1061 return rc;
1062}
1063
1064
1065DECLHIDDEN(int) rtR0MemObjNativeLockKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pv, size_t cb, uint32_t fAccess)
1066{
1067 void *pvLast = (uint8_t *)pv + cb - 1;
1068 size_t const cPages = cb >> PAGE_SHIFT;
1069 PRTR0MEMOBJLNX pMemLnx;
1070 bool fLinearMapping;
1071 int rc;
1072 uint8_t *pbPage;
1073 size_t iPage;
1074 NOREF(fAccess);
1075
1076 if ( !RTR0MemKernelIsValidAddr(pv)
1077 || !RTR0MemKernelIsValidAddr(pv + cb))
1078 return VERR_INVALID_PARAMETER;
1079
1080 /*
1081 * The lower part of the kernel memory has a linear mapping between
1082 * physical and virtual addresses. So we take a short cut here. This is
1083 * assumed to be the cleanest way to handle those addresses (and the code
1084 * is well tested, though the test for determining it is not very nice).
1085 * If we ever decide it isn't we can still remove it.
1086 */
1087#if 0
1088 fLinearMapping = (unsigned long)pvLast < VMALLOC_START;
1089#else
1090 fLinearMapping = (unsigned long)pv >= (unsigned long)__va(0)
1091 && (unsigned long)pvLast < (unsigned long)high_memory;
1092#endif
1093
1094 /*
1095 * Allocate the memory object.
1096 */
1097 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), RTR0MEMOBJTYPE_LOCK, pv, cb);
1098 if (!pMemLnx)
1099 return VERR_NO_MEMORY;
1100
1101 /*
1102 * Gather the pages.
1103 * We ASSUME all kernel pages are non-swappable and non-movable.
1104 */
1105 rc = VINF_SUCCESS;
1106 pbPage = (uint8_t *)pvLast;
1107 iPage = cPages;
1108 if (!fLinearMapping)
1109 {
1110 while (iPage-- > 0)
1111 {
1112 struct page *pPage = rtR0MemObjLinuxVirtToPage(pbPage);
1113 if (RT_UNLIKELY(!pPage))
1114 {
1115 rc = VERR_LOCK_FAILED;
1116 break;
1117 }
1118 pMemLnx->apPages[iPage] = pPage;
1119 pbPage -= PAGE_SIZE;
1120 }
1121 }
1122 else
1123 {
1124 while (iPage-- > 0)
1125 {
1126 pMemLnx->apPages[iPage] = virt_to_page(pbPage);
1127 pbPage -= PAGE_SIZE;
1128 }
1129 }
1130 if (RT_SUCCESS(rc))
1131 {
1132 /*
1133 * Complete the memory object and return.
1134 */
1135 pMemLnx->Core.u.Lock.R0Process = NIL_RTR0PROCESS;
1136 pMemLnx->cPages = cPages;
1137 Assert(!pMemLnx->fMappedToRing0);
1138 *ppMem = &pMemLnx->Core;
1139
1140 return VINF_SUCCESS;
1141 }
1142
1143 rtR0MemObjDelete(&pMemLnx->Core);
1144 return rc;
1145}
1146
1147
1148DECLHIDDEN(int) rtR0MemObjNativeReserveKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pvFixed, size_t cb, size_t uAlignment)
1149{
1150#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1151 const size_t cPages = cb >> PAGE_SHIFT;
1152 struct page *pDummyPage;
1153 struct page **papPages;
1154
1155 /* check for unsupported stuff. */
1156 AssertMsgReturn(pvFixed == (void *)-1, ("%p\n", pvFixed), VERR_NOT_SUPPORTED);
1157 if (uAlignment > PAGE_SIZE)
1158 return VERR_NOT_SUPPORTED;
1159
1160 /*
1161 * Allocate a dummy page and create a page pointer array for vmap such that
1162 * the dummy page is mapped all over the reserved area.
1163 */
1164 pDummyPage = alloc_page(GFP_HIGHUSER);
1165 if (!pDummyPage)
1166 return VERR_NO_MEMORY;
1167 papPages = RTMemAlloc(sizeof(*papPages) * cPages);
1168 if (papPages)
1169 {
1170 void *pv;
1171 size_t iPage = cPages;
1172 while (iPage-- > 0)
1173 papPages[iPage] = pDummyPage;
1174# ifdef VM_MAP
1175 pv = vmap(papPages, cPages, VM_MAP, PAGE_KERNEL_RO);
1176# else
1177 pv = vmap(papPages, cPages, VM_ALLOC, PAGE_KERNEL_RO);
1178# endif
1179 RTMemFree(papPages);
1180 if (pv)
1181 {
1182 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_RES_VIRT, pv, cb);
1183 if (pMemLnx)
1184 {
1185 pMemLnx->Core.u.ResVirt.R0Process = NIL_RTR0PROCESS;
1186 pMemLnx->cPages = 1;
1187 pMemLnx->apPages[0] = pDummyPage;
1188 *ppMem = &pMemLnx->Core;
1189 return VINF_SUCCESS;
1190 }
1191 vunmap(pv);
1192 }
1193 }
1194 __free_page(pDummyPage);
1195 return VERR_NO_MEMORY;
1196
1197#else /* < 2.4.22 */
1198 /*
1199 * Could probably use ioremap here, but the caller is in a better position than us
1200 * to select some safe physical memory.
1201 */
1202 return VERR_NOT_SUPPORTED;
1203#endif
1204}
1205
1206
1207DECLHIDDEN(int) rtR0MemObjNativeReserveUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment, RTR0PROCESS R0Process)
1208{
1209 PRTR0MEMOBJLNX pMemLnx;
1210 void *pv;
1211 struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
1212 if (!pTask)
1213 return VERR_NOT_SUPPORTED;
1214
1215 /*
1216 * Check that the specified alignment is supported.
1217 */
1218 if (uAlignment > PAGE_SIZE)
1219 return VERR_NOT_SUPPORTED;
1220
1221 /*
1222 * Let rtR0MemObjLinuxDoMmap do the difficult bits.
1223 */
1224 pv = rtR0MemObjLinuxDoMmap(R3PtrFixed, cb, uAlignment, pTask, RTMEM_PROT_NONE);
1225 if (pv == (void *)-1)
1226 return VERR_NO_MEMORY;
1227
1228 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_RES_VIRT, pv, cb);
1229 if (!pMemLnx)
1230 {
1231 rtR0MemObjLinuxDoMunmap(pv, cb, pTask);
1232 return VERR_NO_MEMORY;
1233 }
1234
1235 pMemLnx->Core.u.ResVirt.R0Process = R0Process;
1236 *ppMem = &pMemLnx->Core;
1237 return VINF_SUCCESS;
1238}
1239
1240
1241DECLHIDDEN(int) rtR0MemObjNativeMapKernel(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJ pMemToMap,
1242 void *pvFixed, size_t uAlignment,
1243 unsigned fProt, size_t offSub, size_t cbSub)
1244{
1245 int rc = VERR_NO_MEMORY;
1246 PRTR0MEMOBJLNX pMemLnxToMap = (PRTR0MEMOBJLNX)pMemToMap;
1247 PRTR0MEMOBJLNX pMemLnx;
1248
1249 /* Fail if requested to do something we can't. */
1250 AssertMsgReturn(!offSub && !cbSub, ("%#x %#x\n", offSub, cbSub), VERR_NOT_SUPPORTED);
1251 AssertMsgReturn(pvFixed == (void *)-1, ("%p\n", pvFixed), VERR_NOT_SUPPORTED);
1252 if (uAlignment > PAGE_SIZE)
1253 return VERR_NOT_SUPPORTED;
1254
1255 /*
1256 * Create the IPRT memory object.
1257 */
1258 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_MAPPING, NULL, pMemLnxToMap->Core.cb);
1259 if (pMemLnx)
1260 {
1261 if (pMemLnxToMap->cPages)
1262 {
1263#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1264 /*
1265 * Use vmap - 2.4.22 and later.
1266 */
1267 pgprot_t fPg = rtR0MemObjLinuxConvertProt(fProt, true /* kernel */);
1268# ifdef VM_MAP
1269 pMemLnx->Core.pv = vmap(&pMemLnxToMap->apPages[0], pMemLnxToMap->cPages, VM_MAP, fPg);
1270# else
1271 pMemLnx->Core.pv = vmap(&pMemLnxToMap->apPages[0], pMemLnxToMap->cPages, VM_ALLOC, fPg);
1272# endif
1273 if (pMemLnx->Core.pv)
1274 {
1275 pMemLnx->fMappedToRing0 = true;
1276 rc = VINF_SUCCESS;
1277 }
1278 else
1279 rc = VERR_MAP_FAILED;
1280
1281#else /* < 2.4.22 */
1282 /*
1283 * Only option here is to share mappings if possible and forget about fProt.
1284 */
1285 if (rtR0MemObjIsRing3(pMemToMap))
1286 rc = VERR_NOT_SUPPORTED;
1287 else
1288 {
1289 rc = VINF_SUCCESS;
1290 if (!pMemLnxToMap->Core.pv)
1291 rc = rtR0MemObjLinuxVMap(pMemLnxToMap, !!(fProt & RTMEM_PROT_EXEC));
1292 if (RT_SUCCESS(rc))
1293 {
1294 Assert(pMemLnxToMap->Core.pv);
1295 pMemLnx->Core.pv = pMemLnxToMap->Core.pv;
1296 }
1297 }
1298#endif
1299 }
1300 else
1301 {
1302 /*
1303 * MMIO / physical memory.
1304 */
1305 Assert(pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_PHYS && !pMemLnxToMap->Core.u.Phys.fAllocated);
1306 pMemLnx->Core.pv = pMemLnxToMap->Core.u.Phys.uCachePolicy == RTMEM_CACHE_POLICY_MMIO
1307 ? ioremap_nocache(pMemLnxToMap->Core.u.Phys.PhysBase, pMemLnxToMap->Core.cb)
1308 : ioremap(pMemLnxToMap->Core.u.Phys.PhysBase, pMemLnxToMap->Core.cb);
1309 if (pMemLnx->Core.pv)
1310 {
1311 /** @todo fix protection. */
1312 rc = VINF_SUCCESS;
1313 }
1314 }
1315 if (RT_SUCCESS(rc))
1316 {
1317 pMemLnx->Core.u.Mapping.R0Process = NIL_RTR0PROCESS;
1318 *ppMem = &pMemLnx->Core;
1319 return VINF_SUCCESS;
1320 }
1321 rtR0MemObjDelete(&pMemLnx->Core);
1322 }
1323
1324 return rc;
1325}
1326
1327
1328#ifdef VBOX_USE_PAE_HACK
1329/**
1330 * Replace the PFN of a PTE with the address of the actual page.
1331 *
1332 * The caller maps a reserved dummy page at the address with the desired access
1333 * and flags.
1334 *
1335 * This hack is required for older Linux kernels which don't provide
1336 * remap_pfn_range().
1337 *
1338 * @returns 0 on success, -ENOMEM on failure.
1339 * @param mm The memory context.
1340 * @param ulAddr The mapping address.
1341 * @param Phys The physical address of the page to map.
1342 */
1343static int rtR0MemObjLinuxFixPte(struct mm_struct *mm, unsigned long ulAddr, RTHCPHYS Phys)
1344{
1345 int rc = -ENOMEM;
1346 pgd_t *pgd;
1347
1348 spin_lock(&mm->page_table_lock);
1349
1350 pgd = pgd_offset(mm, ulAddr);
1351 if (!pgd_none(*pgd) && !pgd_bad(*pgd))
1352 {
1353 pmd_t *pmd = pmd_offset(pgd, ulAddr);
1354 if (!pmd_none(*pmd))
1355 {
1356 pte_t *ptep = pte_offset_map(pmd, ulAddr);
1357 if (ptep)
1358 {
1359 pte_t pte = *ptep;
1360 pte.pte_high &= 0xfff00000;
1361 pte.pte_high |= ((Phys >> 32) & 0x000fffff);
1362 pte.pte_low &= 0x00000fff;
1363 pte.pte_low |= (Phys & 0xfffff000);
1364 set_pte(ptep, pte);
1365 pte_unmap(ptep);
1366 rc = 0;
1367 }
1368 }
1369 }
1370
1371 spin_unlock(&mm->page_table_lock);
1372 return rc;
1373}
1374#endif /* VBOX_USE_PAE_HACK */
1375
1376
1377DECLHIDDEN(int) rtR0MemObjNativeMapUser(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJ pMemToMap, RTR3PTR R3PtrFixed,
1378 size_t uAlignment, unsigned fProt, RTR0PROCESS R0Process)
1379{
1380 struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
1381 PRTR0MEMOBJLNX pMemLnxToMap = (PRTR0MEMOBJLNX)pMemToMap;
1382 int rc = VERR_NO_MEMORY;
1383 PRTR0MEMOBJLNX pMemLnx;
1384#ifdef VBOX_USE_PAE_HACK
1385 struct page *pDummyPage;
1386 RTHCPHYS DummyPhys;
1387#endif
1388
1389 /*
1390 * Check for restrictions.
1391 */
1392 if (!pTask)
1393 return VERR_NOT_SUPPORTED;
1394 if (uAlignment > PAGE_SIZE)
1395 return VERR_NOT_SUPPORTED;
1396
1397#ifdef VBOX_USE_PAE_HACK
1398 /*
1399 * Allocate a dummy page for use when mapping the memory.
1400 */
1401 pDummyPage = alloc_page(GFP_USER);
1402 if (!pDummyPage)
1403 return VERR_NO_MEMORY;
1404 SetPageReserved(pDummyPage);
1405 DummyPhys = page_to_phys(pDummyPage);
1406#endif
1407
1408 /*
1409 * Create the IPRT memory object.
1410 */
1411 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_MAPPING, NULL, pMemLnxToMap->Core.cb);
1412 if (pMemLnx)
1413 {
1414 /*
1415 * Allocate user space mapping.
1416 */
1417 void *pv;
1418 pv = rtR0MemObjLinuxDoMmap(R3PtrFixed, pMemLnxToMap->Core.cb, uAlignment, pTask, fProt);
1419 if (pv != (void *)-1)
1420 {
1421 /*
1422 * Map page by page into the mmap area.
1423 * This is generic, paranoid and not very efficient.
1424 */
1425 pgprot_t fPg = rtR0MemObjLinuxConvertProt(fProt, false /* user */);
1426 unsigned long ulAddrCur = (unsigned long)pv;
1427 const size_t cPages = pMemLnxToMap->Core.cb >> PAGE_SHIFT;
1428 size_t iPage;
1429
1430 down_write(&pTask->mm->mmap_sem);
1431
1432 rc = VINF_SUCCESS;
1433 if (pMemLnxToMap->cPages)
1434 {
1435 for (iPage = 0; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE)
1436 {
1437#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)
1438 RTHCPHYS Phys = page_to_phys(pMemLnxToMap->apPages[iPage]);
1439#endif
1440#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1441 struct vm_area_struct *vma = find_vma(pTask->mm, ulAddrCur); /* this is probably the same for all the pages... */
1442 AssertBreakStmt(vma, rc = VERR_INTERNAL_ERROR);
1443#endif
1444#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && defined(RT_ARCH_X86)
1445 /* remap_page_range() limitation on x86 */
1446 AssertBreakStmt(Phys < _4G, rc = VERR_NO_MEMORY);
1447#endif
1448
1449#if defined(VBOX_USE_INSERT_PAGE) && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 22)
1450 rc = vm_insert_page(vma, ulAddrCur, pMemLnxToMap->apPages[iPage]);
1451 vma->vm_flags |= VM_RESERVED; /* This flag helps making 100% sure some bad stuff wont happen (swap, core, ++). */
1452#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
1453 rc = remap_pfn_range(vma, ulAddrCur, page_to_pfn(pMemLnxToMap->apPages[iPage]), PAGE_SIZE, fPg);
1454#elif defined(VBOX_USE_PAE_HACK)
1455 rc = remap_page_range(vma, ulAddrCur, DummyPhys, PAGE_SIZE, fPg);
1456 if (!rc)
1457 rc = rtR0MemObjLinuxFixPte(pTask->mm, ulAddrCur, Phys);
1458#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1459 rc = remap_page_range(vma, ulAddrCur, Phys, PAGE_SIZE, fPg);
1460#else /* 2.4 */
1461 rc = remap_page_range(ulAddrCur, Phys, PAGE_SIZE, fPg);
1462#endif
1463 if (rc)
1464 {
1465 rc = VERR_NO_MEMORY;
1466 break;
1467 }
1468 }
1469 }
1470 else
1471 {
1472 RTHCPHYS Phys;
1473 if (pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_PHYS)
1474 Phys = pMemLnxToMap->Core.u.Phys.PhysBase;
1475 else if (pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_CONT)
1476 Phys = pMemLnxToMap->Core.u.Cont.Phys;
1477 else
1478 {
1479 AssertMsgFailed(("%d\n", pMemLnxToMap->Core.enmType));
1480 Phys = NIL_RTHCPHYS;
1481 }
1482 if (Phys != NIL_RTHCPHYS)
1483 {
1484 for (iPage = 0; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE, Phys += PAGE_SIZE)
1485 {
1486#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1487 struct vm_area_struct *vma = find_vma(pTask->mm, ulAddrCur); /* this is probably the same for all the pages... */
1488 AssertBreakStmt(vma, rc = VERR_INTERNAL_ERROR);
1489#endif
1490#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && defined(RT_ARCH_X86)
1491 /* remap_page_range() limitation on x86 */
1492 AssertBreakStmt(Phys < _4G, rc = VERR_NO_MEMORY);
1493#endif
1494
1495#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
1496 rc = remap_pfn_range(vma, ulAddrCur, Phys, PAGE_SIZE, fPg);
1497#elif defined(VBOX_USE_PAE_HACK)
1498 rc = remap_page_range(vma, ulAddrCur, DummyPhys, PAGE_SIZE, fPg);
1499 if (!rc)
1500 rc = rtR0MemObjLinuxFixPte(pTask->mm, ulAddrCur, Phys);
1501#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1502 rc = remap_page_range(vma, ulAddrCur, Phys, PAGE_SIZE, fPg);
1503#else /* 2.4 */
1504 rc = remap_page_range(ulAddrCur, Phys, PAGE_SIZE, fPg);
1505#endif
1506 if (rc)
1507 {
1508 rc = VERR_NO_MEMORY;
1509 break;
1510 }
1511 }
1512 }
1513 }
1514
1515 up_write(&pTask->mm->mmap_sem);
1516
1517 if (RT_SUCCESS(rc))
1518 {
1519#ifdef VBOX_USE_PAE_HACK
1520 __free_page(pDummyPage);
1521#endif
1522 pMemLnx->Core.pv = pv;
1523 pMemLnx->Core.u.Mapping.R0Process = R0Process;
1524 *ppMem = &pMemLnx->Core;
1525 return VINF_SUCCESS;
1526 }
1527
1528 /*
1529 * Bail out.
1530 */
1531 rtR0MemObjLinuxDoMunmap(pv, pMemLnxToMap->Core.cb, pTask);
1532 }
1533 rtR0MemObjDelete(&pMemLnx->Core);
1534 }
1535#ifdef VBOX_USE_PAE_HACK
1536 __free_page(pDummyPage);
1537#endif
1538
1539 return rc;
1540}
1541
1542
1543DECLHIDDEN(int) rtR0MemObjNativeProtect(PRTR0MEMOBJINTERNAL pMem, size_t offSub, size_t cbSub, uint32_t fProt)
1544{
1545 NOREF(pMem);
1546 NOREF(offSub);
1547 NOREF(cbSub);
1548 NOREF(fProt);
1549 return VERR_NOT_SUPPORTED;
1550}
1551
1552
1553DECLHIDDEN(RTHCPHYS) rtR0MemObjNativeGetPagePhysAddr(PRTR0MEMOBJINTERNAL pMem, size_t iPage)
1554{
1555 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)pMem;
1556
1557 if (pMemLnx->cPages)
1558 return page_to_phys(pMemLnx->apPages[iPage]);
1559
1560 switch (pMemLnx->Core.enmType)
1561 {
1562 case RTR0MEMOBJTYPE_CONT:
1563 return pMemLnx->Core.u.Cont.Phys + (iPage << PAGE_SHIFT);
1564
1565 case RTR0MEMOBJTYPE_PHYS:
1566 return pMemLnx->Core.u.Phys.PhysBase + (iPage << PAGE_SHIFT);
1567
1568 /* the parent knows */
1569 case RTR0MEMOBJTYPE_MAPPING:
1570 return rtR0MemObjNativeGetPagePhysAddr(pMemLnx->Core.uRel.Child.pParent, iPage);
1571
1572 /* cPages > 0 */
1573 case RTR0MEMOBJTYPE_LOW:
1574 case RTR0MEMOBJTYPE_LOCK:
1575 case RTR0MEMOBJTYPE_PHYS_NC:
1576 case RTR0MEMOBJTYPE_PAGE:
1577 default:
1578 AssertMsgFailed(("%d\n", pMemLnx->Core.enmType));
1579 /* fall thru */
1580
1581 case RTR0MEMOBJTYPE_RES_VIRT:
1582 return NIL_RTHCPHYS;
1583 }
1584}
1585

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette