VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GMMR0.cpp@ 103131

Last change on this file since 103131 was 98103, checked in by vboxsync, 21 months ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 203.9 KB
Line 
1/* $Id: GMMR0.cpp 98103 2023-01-17 14:15:46Z vboxsync $ */
2/** @file
3 * GMM - Global Memory Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/** @page pg_gmm GMM - The Global Memory Manager
30 *
31 * As the name indicates, this component is responsible for global memory
32 * management. Currently only guest RAM is allocated from the GMM, but this
33 * may change to include shadow page tables and other bits later.
34 *
35 * Guest RAM is managed as individual pages, but allocated from the host OS
36 * in chunks for reasons of portability / efficiency. To minimize the memory
37 * footprint all tracking structure must be as small as possible without
38 * unnecessary performance penalties.
39 *
40 * The allocation chunks has fixed sized, the size defined at compile time
41 * by the #GMM_CHUNK_SIZE \#define.
42 *
43 * Each chunk is given an unique ID. Each page also has a unique ID. The
44 * relationship between the two IDs is:
45 * @code
46 * GMM_CHUNK_SHIFT = log2(GMM_CHUNK_SIZE / GUEST_PAGE_SIZE);
47 * idPage = (idChunk << GMM_CHUNK_SHIFT) | iPage;
48 * @endcode
49 * Where iPage is the index of the page within the chunk. This ID scheme
50 * permits for efficient chunk and page lookup, but it relies on the chunk size
51 * to be set at compile time. The chunks are organized in an AVL tree with their
52 * IDs being the keys.
53 *
54 * The physical address of each page in an allocation chunk is maintained by
55 * the #RTR0MEMOBJ and obtained using #RTR0MemObjGetPagePhysAddr. There is no
56 * need to duplicate this information (it'll cost 8-bytes per page if we did).
57 *
58 * So what do we need to track per page? Most importantly we need to know
59 * which state the page is in:
60 * - Private - Allocated for (eventually) backing one particular VM page.
61 * - Shared - Readonly page that is used by one or more VMs and treated
62 * as COW by PGM.
63 * - Free - Not used by anyone.
64 *
65 * For the page replacement operations (sharing, defragmenting and freeing)
66 * to be somewhat efficient, private pages needs to be associated with a
67 * particular page in a particular VM.
68 *
69 * Tracking the usage of shared pages is impractical and expensive, so we'll
70 * settle for a reference counting system instead.
71 *
72 * Free pages will be chained on LIFOs
73 *
74 * On 64-bit systems we will use a 64-bit bitfield per page, while on 32-bit
75 * systems a 32-bit bitfield will have to suffice because of address space
76 * limitations. The #GMMPAGE structure shows the details.
77 *
78 *
79 * @section sec_gmm_alloc_strat Page Allocation Strategy
80 *
81 * The strategy for allocating pages has to take fragmentation and shared
82 * pages into account, or we may end up with with 2000 chunks with only
83 * a few pages in each. Shared pages cannot easily be reallocated because
84 * of the inaccurate usage accounting (see above). Private pages can be
85 * reallocated by a defragmentation thread in the same manner that sharing
86 * is done.
87 *
88 * The first approach is to manage the free pages in two sets depending on
89 * whether they are mainly for the allocation of shared or private pages.
90 * In the initial implementation there will be almost no possibility for
91 * mixing shared and private pages in the same chunk (only if we're really
92 * stressed on memory), but when we implement forking of VMs and have to
93 * deal with lots of COW pages it'll start getting kind of interesting.
94 *
95 * The sets are lists of chunks with approximately the same number of
96 * free pages. Say the chunk size is 1MB, meaning 256 pages, and a set
97 * consists of 16 lists. So, the first list will contain the chunks with
98 * 1-7 free pages, the second covers 8-15, and so on. The chunks will be
99 * moved between the lists as pages are freed up or allocated.
100 *
101 *
102 * @section sec_gmm_costs Costs
103 *
104 * The per page cost in kernel space is 32-bit plus whatever RTR0MEMOBJ
105 * entails. In addition there is the chunk cost of approximately
106 * (sizeof(RT0MEMOBJ) + sizeof(CHUNK)) / 2^CHUNK_SHIFT bytes per page.
107 *
108 * On Windows the per page #RTR0MEMOBJ cost is 32-bit on 32-bit windows
109 * and 64-bit on 64-bit windows (a PFN_NUMBER in the MDL). So, 64-bit per page.
110 * The cost on Linux is identical, but here it's because of sizeof(struct page *).
111 *
112 *
113 * @section sec_gmm_legacy Legacy Mode for Non-Tier-1 Platforms
114 *
115 * In legacy mode the page source is locked user pages and not
116 * #RTR0MemObjAllocPhysNC, this means that a page can only be allocated
117 * by the VM that locked it. We will make no attempt at implementing
118 * page sharing on these systems, just do enough to make it all work.
119 *
120 * @note With 6.1 really dropping 32-bit support, the legacy mode is obsoleted
121 * under the assumption that there is sufficient kernel virtual address
122 * space to map all of the guest memory allocations. So, we'll be using
123 * #RTR0MemObjAllocPage on some platforms as an alternative to
124 * #RTR0MemObjAllocPhysNC.
125 *
126 *
127 * @subsection sub_gmm_locking Serializing
128 *
129 * One simple fast mutex will be employed in the initial implementation, not
130 * two as mentioned in @ref sec_pgmPhys_Serializing.
131 *
132 * @see @ref sec_pgmPhys_Serializing
133 *
134 *
135 * @section sec_gmm_overcommit Memory Over-Commitment Management
136 *
137 * The GVM will have to do the system wide memory over-commitment
138 * management. My current ideas are:
139 * - Per VM oc policy that indicates how much to initially commit
140 * to it and what to do in a out-of-memory situation.
141 * - Prevent overtaxing the host.
142 *
143 * There are some challenges here, the main ones are configurability and
144 * security. Should we for instance permit anyone to request 100% memory
145 * commitment? Who should be allowed to do runtime adjustments of the
146 * config. And how to prevent these settings from being lost when the last
147 * VM process exits? The solution is probably to have an optional root
148 * daemon the will keep VMMR0.r0 in memory and enable the security measures.
149 *
150 *
151 *
152 * @section sec_gmm_numa NUMA
153 *
154 * NUMA considerations will be designed and implemented a bit later.
155 *
156 * The preliminary guesses is that we will have to try allocate memory as
157 * close as possible to the CPUs the VM is executed on (EMT and additional CPU
158 * threads). Which means it's mostly about allocation and sharing policies.
159 * Both the scheduler and allocator interface will to supply some NUMA info
160 * and we'll need to have a way to calc access costs.
161 *
162 */
163
164
165/*********************************************************************************************************************************
166* Header Files *
167*********************************************************************************************************************************/
168#define LOG_GROUP LOG_GROUP_GMM
169#include <VBox/rawpci.h>
170#include <VBox/vmm/gmm.h>
171#include "GMMR0Internal.h"
172#include <VBox/vmm/vmcc.h>
173#include <VBox/vmm/pgm.h>
174#include <VBox/log.h>
175#include <VBox/param.h>
176#include <VBox/err.h>
177#include <VBox/VMMDev.h>
178#include <iprt/asm.h>
179#include <iprt/avl.h>
180#ifdef VBOX_STRICT
181# include <iprt/crc.h>
182#endif
183#include <iprt/critsect.h>
184#include <iprt/list.h>
185#include <iprt/mem.h>
186#include <iprt/memobj.h>
187#include <iprt/mp.h>
188#include <iprt/semaphore.h>
189#include <iprt/spinlock.h>
190#include <iprt/string.h>
191#include <iprt/time.h>
192
193/* This is 64-bit only code now. */
194#if HC_ARCH_BITS != 64 || ARCH_BITS != 64
195# error "This is 64-bit only code"
196#endif
197
198
199/*********************************************************************************************************************************
200* Defined Constants And Macros *
201*********************************************************************************************************************************/
202/** @def VBOX_USE_CRIT_SECT_FOR_GIANT
203 * Use a critical section instead of a fast mutex for the giant GMM lock.
204 *
205 * @remarks This is primarily a way of avoiding the deadlock checks in the
206 * windows driver verifier. */
207#if defined(RT_OS_WINDOWS) || defined(RT_OS_DARWIN) || defined(DOXYGEN_RUNNING)
208# define VBOX_USE_CRIT_SECT_FOR_GIANT
209#endif
210
211
212/*********************************************************************************************************************************
213* Structures and Typedefs *
214*********************************************************************************************************************************/
215/** Pointer to set of free chunks. */
216typedef struct GMMCHUNKFREESET *PGMMCHUNKFREESET;
217
218/**
219 * The per-page tracking structure employed by the GMM.
220 *
221 * Because of the different layout on 32-bit and 64-bit hosts in earlier
222 * versions of the code, macros are used to get and set some of the data.
223 */
224typedef union GMMPAGE
225{
226 /** Unsigned integer view. */
227 uint64_t u;
228
229 /** The common view. */
230 struct GMMPAGECOMMON
231 {
232 uint32_t uStuff1 : 32;
233 uint32_t uStuff2 : 30;
234 /** The page state. */
235 uint32_t u2State : 2;
236 } Common;
237
238 /** The view of a private page. */
239 struct GMMPAGEPRIVATE
240 {
241 /** The guest page frame number. (Max addressable: 2 ^ 44 - 16) */
242 uint32_t pfn;
243 /** The GVM handle. (64K VMs) */
244 uint32_t hGVM : 16;
245 /** Reserved. */
246 uint32_t u16Reserved : 14;
247 /** The page state. */
248 uint32_t u2State : 2;
249 } Private;
250
251 /** The view of a shared page. */
252 struct GMMPAGESHARED
253 {
254 /** The host page frame number. (Max addressable: 2 ^ 44 - 16) */
255 uint32_t pfn;
256 /** The reference count (64K VMs). */
257 uint32_t cRefs : 16;
258 /** Used for debug checksumming. */
259 uint32_t u14Checksum : 14;
260 /** The page state. */
261 uint32_t u2State : 2;
262 } Shared;
263
264 /** The view of a free page. */
265 struct GMMPAGEFREE
266 {
267 /** The index of the next page in the free list. UINT16_MAX is NIL. */
268 uint16_t iNext;
269 /** Reserved. Checksum or something? */
270 uint16_t u16Reserved0;
271 /** Reserved. Checksum or something? */
272 uint32_t u30Reserved1 : 29;
273 /** Set if the page was zeroed. */
274 uint32_t fZeroed : 1;
275 /** The page state. */
276 uint32_t u2State : 2;
277 } Free;
278} GMMPAGE;
279AssertCompileSize(GMMPAGE, sizeof(RTHCUINTPTR));
280/** Pointer to a GMMPAGE. */
281typedef GMMPAGE *PGMMPAGE;
282
283
284/** @name The Page States.
285 * @{ */
286/** A private page. */
287#define GMM_PAGE_STATE_PRIVATE 0
288/** A shared page. */
289#define GMM_PAGE_STATE_SHARED 2
290/** A free page. */
291#define GMM_PAGE_STATE_FREE 3
292/** @} */
293
294
295/** @def GMM_PAGE_IS_PRIVATE
296 *
297 * @returns true if private, false if not.
298 * @param pPage The GMM page.
299 */
300#define GMM_PAGE_IS_PRIVATE(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_PRIVATE )
301
302/** @def GMM_PAGE_IS_SHARED
303 *
304 * @returns true if shared, false if not.
305 * @param pPage The GMM page.
306 */
307#define GMM_PAGE_IS_SHARED(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_SHARED )
308
309/** @def GMM_PAGE_IS_FREE
310 *
311 * @returns true if free, false if not.
312 * @param pPage The GMM page.
313 */
314#define GMM_PAGE_IS_FREE(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_FREE )
315
316/** @def GMM_PAGE_PFN_LAST
317 * The last valid guest pfn range.
318 * @remark Some of the values outside the range has special meaning,
319 * see GMM_PAGE_PFN_UNSHAREABLE.
320 */
321#define GMM_PAGE_PFN_LAST UINT32_C(0xfffffff0)
322AssertCompile(GMM_PAGE_PFN_LAST == (GMM_GCPHYS_LAST >> GUEST_PAGE_SHIFT));
323
324/** @def GMM_PAGE_PFN_UNSHAREABLE
325 * Indicates that this page isn't used for normal guest memory and thus isn't shareable.
326 */
327#define GMM_PAGE_PFN_UNSHAREABLE UINT32_C(0xfffffff1)
328AssertCompile(GMM_PAGE_PFN_UNSHAREABLE == (GMM_GCPHYS_UNSHAREABLE >> GUEST_PAGE_SHIFT));
329
330
331/**
332 * A GMM allocation chunk ring-3 mapping record.
333 *
334 * This should really be associated with a session and not a VM, but
335 * it's simpler to associated with a VM and cleanup with the VM object
336 * is destroyed.
337 */
338typedef struct GMMCHUNKMAP
339{
340 /** The mapping object. */
341 RTR0MEMOBJ hMapObj;
342 /** The VM owning the mapping. */
343 PGVM pGVM;
344} GMMCHUNKMAP;
345/** Pointer to a GMM allocation chunk mapping. */
346typedef struct GMMCHUNKMAP *PGMMCHUNKMAP;
347
348
349/**
350 * A GMM allocation chunk.
351 */
352typedef struct GMMCHUNK
353{
354 /** The AVL node core.
355 * The Key is the chunk ID. (Giant mtx.) */
356 AVLU32NODECORE Core;
357 /** The memory object.
358 * Either from RTR0MemObjAllocPhysNC or RTR0MemObjLockUser depending on
359 * what the host can dish up with. (Chunk mtx protects mapping accesses
360 * and related frees.) */
361 RTR0MEMOBJ hMemObj;
362#ifndef VBOX_WITH_LINEAR_HOST_PHYS_MEM
363 /** Pointer to the kernel mapping. */
364 uint8_t *pbMapping;
365#endif
366 /** Pointer to the next chunk in the free list. (Giant mtx.) */
367 PGMMCHUNK pFreeNext;
368 /** Pointer to the previous chunk in the free list. (Giant mtx.) */
369 PGMMCHUNK pFreePrev;
370 /** Pointer to the free set this chunk belongs to. NULL for
371 * chunks with no free pages. (Giant mtx.) */
372 PGMMCHUNKFREESET pSet;
373 /** List node in the chunk list (GMM::ChunkList). (Giant mtx.) */
374 RTLISTNODE ListNode;
375 /** Pointer to an array of mappings. (Chunk mtx.) */
376 PGMMCHUNKMAP paMappingsX;
377 /** The number of mappings. (Chunk mtx.) */
378 uint16_t cMappingsX;
379 /** The mapping lock this chunk is using using. UINT8_MAX if nobody is mapping
380 * or freeing anything. (Giant mtx.) */
381 uint8_t volatile iChunkMtx;
382 /** GMM_CHUNK_FLAGS_XXX. (Giant mtx.) */
383 uint8_t fFlags;
384 /** The head of the list of free pages. UINT16_MAX is the NIL value.
385 * (Giant mtx.) */
386 uint16_t iFreeHead;
387 /** The number of free pages. (Giant mtx.) */
388 uint16_t cFree;
389 /** The GVM handle of the VM that first allocated pages from this chunk, this
390 * is used as a preference when there are several chunks to choose from.
391 * When in bound memory mode this isn't a preference any longer. (Giant
392 * mtx.) */
393 uint16_t hGVM;
394 /** The ID of the NUMA node the memory mostly resides on. (Reserved for
395 * future use.) (Giant mtx.) */
396 uint16_t idNumaNode;
397 /** The number of private pages. (Giant mtx.) */
398 uint16_t cPrivate;
399 /** The number of shared pages. (Giant mtx.) */
400 uint16_t cShared;
401 /** The UID this chunk is associated with. */
402 RTUID uidOwner;
403 uint32_t u32Padding;
404 /** The pages. (Giant mtx.) */
405 GMMPAGE aPages[GMM_CHUNK_NUM_PAGES];
406} GMMCHUNK;
407
408/** Indicates that the NUMA properies of the memory is unknown. */
409#define GMM_CHUNK_NUMA_ID_UNKNOWN UINT16_C(0xfffe)
410
411/** @name GMM_CHUNK_FLAGS_XXX - chunk flags.
412 * @{ */
413/** Indicates that the chunk is a large page (2MB). */
414#define GMM_CHUNK_FLAGS_LARGE_PAGE UINT16_C(0x0001)
415/** @} */
416
417
418/**
419 * An allocation chunk TLB entry.
420 */
421typedef struct GMMCHUNKTLBE
422{
423 /** The chunk id. */
424 uint32_t idChunk;
425 /** Pointer to the chunk. */
426 PGMMCHUNK pChunk;
427} GMMCHUNKTLBE;
428/** Pointer to an allocation chunk TLB entry. */
429typedef GMMCHUNKTLBE *PGMMCHUNKTLBE;
430
431
432/** The number of entries in the allocation chunk TLB. */
433#define GMM_CHUNKTLB_ENTRIES 32
434/** Gets the TLB entry index for the given Chunk ID. */
435#define GMM_CHUNKTLB_IDX(idChunk) ( (idChunk) & (GMM_CHUNKTLB_ENTRIES - 1) )
436
437/**
438 * An allocation chunk TLB.
439 */
440typedef struct GMMCHUNKTLB
441{
442 /** The TLB entries. */
443 GMMCHUNKTLBE aEntries[GMM_CHUNKTLB_ENTRIES];
444} GMMCHUNKTLB;
445/** Pointer to an allocation chunk TLB. */
446typedef GMMCHUNKTLB *PGMMCHUNKTLB;
447
448
449/**
450 * The GMM instance data.
451 */
452typedef struct GMM
453{
454 /** Magic / eye catcher. GMM_MAGIC */
455 uint32_t u32Magic;
456 /** The number of threads waiting on the mutex. */
457 uint32_t cMtxContenders;
458#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
459 /** The critical section protecting the GMM.
460 * More fine grained locking can be implemented later if necessary. */
461 RTCRITSECT GiantCritSect;
462#else
463 /** The fast mutex protecting the GMM.
464 * More fine grained locking can be implemented later if necessary. */
465 RTSEMFASTMUTEX hMtx;
466#endif
467#ifdef VBOX_STRICT
468 /** The current mutex owner. */
469 RTNATIVETHREAD hMtxOwner;
470#endif
471 /** Spinlock protecting the AVL tree.
472 * @todo Make this a read-write spinlock as we should allow concurrent
473 * lookups. */
474 RTSPINLOCK hSpinLockTree;
475 /** The chunk tree.
476 * Protected by hSpinLockTree. */
477 PAVLU32NODECORE pChunks;
478 /** Chunk freeing generation - incremented whenever a chunk is freed. Used
479 * for validating the per-VM chunk TLB entries. Valid range is 1 to 2^62
480 * (exclusive), though higher numbers may temporarily occure while
481 * invalidating the individual TLBs during wrap-around processing. */
482 uint64_t volatile idFreeGeneration;
483 /** The chunk TLB.
484 * Protected by hSpinLockTree. */
485 GMMCHUNKTLB ChunkTLB;
486 /** The private free set. */
487 GMMCHUNKFREESET PrivateX;
488 /** The shared free set. */
489 GMMCHUNKFREESET Shared;
490
491 /** Shared module tree (global).
492 * @todo separate trees for distinctly different guest OSes. */
493 PAVLLU32NODECORE pGlobalSharedModuleTree;
494 /** Sharable modules (count of nodes in pGlobalSharedModuleTree). */
495 uint32_t cShareableModules;
496
497 /** The chunk list. For simplifying the cleanup process and avoid tree
498 * traversal. */
499 RTLISTANCHOR ChunkList;
500
501 /** The maximum number of pages we're allowed to allocate.
502 * @gcfgm{GMM/MaxPages,64-bit, Direct.}
503 * @gcfgm{GMM/PctPages,32-bit, Relative to the number of host pages.} */
504 uint64_t cMaxPages;
505 /** The number of pages that has been reserved.
506 * The deal is that cReservedPages - cOverCommittedPages <= cMaxPages. */
507 uint64_t cReservedPages;
508 /** The number of pages that we have over-committed in reservations. */
509 uint64_t cOverCommittedPages;
510 /** The number of actually allocated (committed if you like) pages. */
511 uint64_t cAllocatedPages;
512 /** The number of pages that are shared. A subset of cAllocatedPages. */
513 uint64_t cSharedPages;
514 /** The number of pages that are actually shared between VMs. */
515 uint64_t cDuplicatePages;
516 /** The number of pages that are shared that has been left behind by
517 * VMs not doing proper cleanups. */
518 uint64_t cLeftBehindSharedPages;
519 /** The number of allocation chunks.
520 * (The number of pages we've allocated from the host can be derived from this.) */
521 uint32_t cChunks;
522 /** The number of current ballooned pages. */
523 uint64_t cBalloonedPages;
524
525#ifdef VBOX_WITH_LINEAR_HOST_PHYS_MEM
526 /** Whether #RTR0MemObjAllocPhysNC works. */
527 bool fHasWorkingAllocPhysNC;
528#else
529 bool fPadding;
530#endif
531 /** The bound memory mode indicator.
532 * When set, the memory will be bound to a specific VM and never
533 * shared. This is always set if fLegacyAllocationMode is set.
534 * (Also determined at initialization time.) */
535 bool fBoundMemoryMode;
536 /** The number of registered VMs. */
537 uint16_t cRegisteredVMs;
538
539 /** The index of the next mutex to use. */
540 uint32_t iNextChunkMtx;
541 /** Chunk locks for reducing lock contention without having to allocate
542 * one lock per chunk. */
543 struct
544 {
545 /** The mutex */
546 RTSEMFASTMUTEX hMtx;
547 /** The number of threads currently using this mutex. */
548 uint32_t volatile cUsers;
549 } aChunkMtx[64];
550
551 /** The number of freed chunks ever. This is used as list generation to
552 * avoid restarting the cleanup scanning when the list wasn't modified. */
553 uint32_t volatile cFreedChunks;
554 /** The previous allocated Chunk ID.
555 * Used as a hint to avoid scanning the whole bitmap. */
556 uint32_t idChunkPrev;
557 /** Spinlock protecting idChunkPrev & bmChunkId. */
558 RTSPINLOCK hSpinLockChunkId;
559 /** Chunk ID allocation bitmap.
560 * Bits of allocated IDs are set, free ones are clear.
561 * The NIL id (0) is marked allocated. */
562 uint32_t bmChunkId[(GMM_CHUNKID_LAST + 1 + 31) / 32];
563} GMM;
564/** Pointer to the GMM instance. */
565typedef GMM *PGMM;
566
567/** The value of GMM::u32Magic (Katsuhiro Otomo). */
568#define GMM_MAGIC UINT32_C(0x19540414)
569
570
571/**
572 * GMM chunk mutex state.
573 *
574 * This is returned by gmmR0ChunkMutexAcquire and is used by the other
575 * gmmR0ChunkMutex* methods.
576 */
577typedef struct GMMR0CHUNKMTXSTATE
578{
579 PGMM pGMM;
580 /** The index of the chunk mutex. */
581 uint8_t iChunkMtx;
582 /** The relevant flags (GMMR0CHUNK_MTX_XXX). */
583 uint8_t fFlags;
584} GMMR0CHUNKMTXSTATE;
585/** Pointer to a chunk mutex state. */
586typedef GMMR0CHUNKMTXSTATE *PGMMR0CHUNKMTXSTATE;
587
588/** @name GMMR0CHUNK_MTX_XXX
589 * @{ */
590#define GMMR0CHUNK_MTX_INVALID UINT32_C(0)
591#define GMMR0CHUNK_MTX_KEEP_GIANT UINT32_C(1)
592#define GMMR0CHUNK_MTX_RETAKE_GIANT UINT32_C(2)
593#define GMMR0CHUNK_MTX_DROP_GIANT UINT32_C(3)
594#define GMMR0CHUNK_MTX_END UINT32_C(4)
595/** @} */
596
597
598/** The maximum number of shared modules per-vm. */
599#define GMM_MAX_SHARED_PER_VM_MODULES 2048
600/** The maximum number of shared modules GMM is allowed to track. */
601#define GMM_MAX_SHARED_GLOBAL_MODULES 16834
602
603
604/**
605 * Argument packet for gmmR0SharedModuleCleanup.
606 */
607typedef struct GMMR0SHMODPERVMDTORARGS
608{
609 PGVM pGVM;
610 PGMM pGMM;
611} GMMR0SHMODPERVMDTORARGS;
612
613/**
614 * Argument packet for gmmR0CheckSharedModule.
615 */
616typedef struct GMMCHECKSHAREDMODULEINFO
617{
618 PGVM pGVM;
619 VMCPUID idCpu;
620} GMMCHECKSHAREDMODULEINFO;
621
622
623/*********************************************************************************************************************************
624* Global Variables *
625*********************************************************************************************************************************/
626/** Pointer to the GMM instance data. */
627static PGMM g_pGMM = NULL;
628
629/** Macro for obtaining and validating the g_pGMM pointer.
630 *
631 * On failure it will return from the invoking function with the specified
632 * return value.
633 *
634 * @param pGMM The name of the pGMM variable.
635 * @param rc The return value on failure. Use VERR_GMM_INSTANCE for VBox
636 * status codes.
637 */
638#define GMM_GET_VALID_INSTANCE(pGMM, rc) \
639 do { \
640 (pGMM) = g_pGMM; \
641 AssertPtrReturn((pGMM), (rc)); \
642 AssertMsgReturn((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic), (rc)); \
643 } while (0)
644
645/** Macro for obtaining and validating the g_pGMM pointer, void function
646 * variant.
647 *
648 * On failure it will return from the invoking function.
649 *
650 * @param pGMM The name of the pGMM variable.
651 */
652#define GMM_GET_VALID_INSTANCE_VOID(pGMM) \
653 do { \
654 (pGMM) = g_pGMM; \
655 AssertPtrReturnVoid((pGMM)); \
656 AssertMsgReturnVoid((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic)); \
657 } while (0)
658
659
660/** @def GMM_CHECK_SANITY_UPON_ENTERING
661 * Checks the sanity of the GMM instance data before making changes.
662 *
663 * This is macro is a stub by default and must be enabled manually in the code.
664 *
665 * @returns true if sane, false if not.
666 * @param pGMM The name of the pGMM variable.
667 */
668#if defined(VBOX_STRICT) && defined(GMMR0_WITH_SANITY_CHECK) && 0
669# define GMM_CHECK_SANITY_UPON_ENTERING(pGMM) (RT_LIKELY(gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0))
670#else
671# define GMM_CHECK_SANITY_UPON_ENTERING(pGMM) (true)
672#endif
673
674/** @def GMM_CHECK_SANITY_UPON_LEAVING
675 * Checks the sanity of the GMM instance data after making changes.
676 *
677 * This is macro is a stub by default and must be enabled manually in the code.
678 *
679 * @returns true if sane, false if not.
680 * @param pGMM The name of the pGMM variable.
681 */
682#if defined(VBOX_STRICT) && defined(GMMR0_WITH_SANITY_CHECK) && 0
683# define GMM_CHECK_SANITY_UPON_LEAVING(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
684#else
685# define GMM_CHECK_SANITY_UPON_LEAVING(pGMM) (true)
686#endif
687
688/** @def GMM_CHECK_SANITY_IN_LOOPS
689 * Checks the sanity of the GMM instance in the allocation loops.
690 *
691 * This is macro is a stub by default and must be enabled manually in the code.
692 *
693 * @returns true if sane, false if not.
694 * @param pGMM The name of the pGMM variable.
695 */
696#if defined(VBOX_STRICT) && defined(GMMR0_WITH_SANITY_CHECK) && 0
697# define GMM_CHECK_SANITY_IN_LOOPS(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
698#else
699# define GMM_CHECK_SANITY_IN_LOOPS(pGMM) (true)
700#endif
701
702
703/*********************************************************************************************************************************
704* Internal Functions *
705*********************************************************************************************************************************/
706static DECLCALLBACK(int) gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM);
707static bool gmmR0CleanupVMScanChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
708DECLINLINE(void) gmmR0UnlinkChunk(PGMMCHUNK pChunk);
709DECLINLINE(void) gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet);
710DECLINLINE(void) gmmR0SelectSetAndLinkChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
711#ifdef GMMR0_WITH_SANITY_CHECK
712static uint32_t gmmR0SanityCheck(PGMM pGMM, const char *pszFunction, unsigned uLineNo);
713#endif
714static bool gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem);
715DECLINLINE(void) gmmR0FreePrivatePage(PGMM pGMM, PGVM pGVM, uint32_t idPage, PGMMPAGE pPage);
716DECLINLINE(void) gmmR0FreeSharedPage(PGMM pGMM, PGVM pGVM, uint32_t idPage, PGMMPAGE pPage);
717static int gmmR0UnmapChunkLocked(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
718#ifdef VBOX_WITH_PAGE_SHARING
719static void gmmR0SharedModuleCleanup(PGMM pGMM, PGVM pGVM);
720# ifdef VBOX_STRICT
721static uint32_t gmmR0StrictPageChecksum(PGMM pGMM, PGVM pGVM, uint32_t idPage);
722# endif
723#endif
724
725
726
727/**
728 * Initializes the GMM component.
729 *
730 * This is called when the VMMR0.r0 module is loaded and protected by the
731 * loader semaphore.
732 *
733 * @returns VBox status code.
734 */
735GMMR0DECL(int) GMMR0Init(void)
736{
737 LogFlow(("GMMInit:\n"));
738
739 /* Currently assuming same host and guest page size here. Can change it to
740 dish out guest pages with different size from the host page later if
741 needed, though a restriction would be the host page size must be larger
742 than the guest page size. */
743 AssertCompile(GUEST_PAGE_SIZE == HOST_PAGE_SIZE);
744 AssertCompile(GUEST_PAGE_SIZE <= HOST_PAGE_SIZE);
745
746 /*
747 * Allocate the instance data and the locks.
748 */
749 PGMM pGMM = (PGMM)RTMemAllocZ(sizeof(*pGMM));
750 if (!pGMM)
751 return VERR_NO_MEMORY;
752
753 pGMM->u32Magic = GMM_MAGIC;
754 for (unsigned i = 0; i < RT_ELEMENTS(pGMM->ChunkTLB.aEntries); i++)
755 pGMM->ChunkTLB.aEntries[i].idChunk = NIL_GMM_CHUNKID;
756 RTListInit(&pGMM->ChunkList);
757 ASMBitSet(&pGMM->bmChunkId[0], NIL_GMM_CHUNKID);
758
759#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
760 int rc = RTCritSectInit(&pGMM->GiantCritSect);
761#else
762 int rc = RTSemFastMutexCreate(&pGMM->hMtx);
763#endif
764 if (RT_SUCCESS(rc))
765 {
766 unsigned iMtx;
767 for (iMtx = 0; iMtx < RT_ELEMENTS(pGMM->aChunkMtx); iMtx++)
768 {
769 rc = RTSemFastMutexCreate(&pGMM->aChunkMtx[iMtx].hMtx);
770 if (RT_FAILURE(rc))
771 break;
772 }
773 pGMM->hSpinLockTree = NIL_RTSPINLOCK;
774 if (RT_SUCCESS(rc))
775 rc = RTSpinlockCreate(&pGMM->hSpinLockTree, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "gmm-chunk-tree");
776 pGMM->hSpinLockChunkId = NIL_RTSPINLOCK;
777 if (RT_SUCCESS(rc))
778 rc = RTSpinlockCreate(&pGMM->hSpinLockChunkId, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "gmm-chunk-id");
779 if (RT_SUCCESS(rc))
780 {
781 /*
782 * Figure out how we're going to allocate stuff (only applicable to
783 * host with linear physical memory mappings).
784 */
785 pGMM->fBoundMemoryMode = false;
786#ifdef VBOX_WITH_LINEAR_HOST_PHYS_MEM
787 pGMM->fHasWorkingAllocPhysNC = false;
788
789 RTR0MEMOBJ hMemObj;
790 rc = RTR0MemObjAllocPhysNC(&hMemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS);
791 if (RT_SUCCESS(rc))
792 {
793 rc = RTR0MemObjFree(hMemObj, true);
794 AssertRC(rc);
795 pGMM->fHasWorkingAllocPhysNC = true;
796 }
797 else if (rc != VERR_NOT_SUPPORTED)
798 SUPR0Printf("GMMR0Init: Warning! RTR0MemObjAllocPhysNC(, %u, NIL_RTHCPHYS) -> %d!\n", GMM_CHUNK_SIZE, rc);
799# endif
800
801 /*
802 * Query system page count and guess a reasonable cMaxPages value.
803 */
804 pGMM->cMaxPages = UINT32_MAX; /** @todo IPRT function for query ram size and such. */
805
806 /*
807 * The idFreeGeneration value should be set so we actually trigger the
808 * wrap-around invalidation handling during a typical test run.
809 */
810 pGMM->idFreeGeneration = UINT64_MAX / 4 - 128;
811
812 g_pGMM = pGMM;
813#ifdef VBOX_WITH_LINEAR_HOST_PHYS_MEM
814 LogFlow(("GMMInit: pGMM=%p fBoundMemoryMode=%RTbool fHasWorkingAllocPhysNC=%RTbool\n", pGMM, pGMM->fBoundMemoryMode, pGMM->fHasWorkingAllocPhysNC));
815#else
816 LogFlow(("GMMInit: pGMM=%p fBoundMemoryMode=%RTbool\n", pGMM, pGMM->fBoundMemoryMode));
817#endif
818 return VINF_SUCCESS;
819 }
820
821 /*
822 * Bail out.
823 */
824 RTSpinlockDestroy(pGMM->hSpinLockChunkId);
825 RTSpinlockDestroy(pGMM->hSpinLockTree);
826 while (iMtx-- > 0)
827 RTSemFastMutexDestroy(pGMM->aChunkMtx[iMtx].hMtx);
828#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
829 RTCritSectDelete(&pGMM->GiantCritSect);
830#else
831 RTSemFastMutexDestroy(pGMM->hMtx);
832#endif
833 }
834
835 pGMM->u32Magic = 0;
836 RTMemFree(pGMM);
837 SUPR0Printf("GMMR0Init: failed! rc=%d\n", rc);
838 return rc;
839}
840
841
842/**
843 * Terminates the GMM component.
844 */
845GMMR0DECL(void) GMMR0Term(void)
846{
847 LogFlow(("GMMTerm:\n"));
848
849 /*
850 * Take care / be paranoid...
851 */
852 PGMM pGMM = g_pGMM;
853 if (!RT_VALID_PTR(pGMM))
854 return;
855 if (pGMM->u32Magic != GMM_MAGIC)
856 {
857 SUPR0Printf("GMMR0Term: u32Magic=%#x\n", pGMM->u32Magic);
858 return;
859 }
860
861 /*
862 * Undo what init did and free all the resources we've acquired.
863 */
864 /* Destroy the fundamentals. */
865 g_pGMM = NULL;
866 pGMM->u32Magic = ~GMM_MAGIC;
867#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
868 RTCritSectDelete(&pGMM->GiantCritSect);
869#else
870 RTSemFastMutexDestroy(pGMM->hMtx);
871 pGMM->hMtx = NIL_RTSEMFASTMUTEX;
872#endif
873 RTSpinlockDestroy(pGMM->hSpinLockTree);
874 pGMM->hSpinLockTree = NIL_RTSPINLOCK;
875 RTSpinlockDestroy(pGMM->hSpinLockChunkId);
876 pGMM->hSpinLockChunkId = NIL_RTSPINLOCK;
877
878 /* Free any chunks still hanging around. */
879 RTAvlU32Destroy(&pGMM->pChunks, gmmR0TermDestroyChunk, pGMM);
880
881 /* Destroy the chunk locks. */
882 for (unsigned iMtx = 0; iMtx < RT_ELEMENTS(pGMM->aChunkMtx); iMtx++)
883 {
884 Assert(pGMM->aChunkMtx[iMtx].cUsers == 0);
885 RTSemFastMutexDestroy(pGMM->aChunkMtx[iMtx].hMtx);
886 pGMM->aChunkMtx[iMtx].hMtx = NIL_RTSEMFASTMUTEX;
887 }
888
889 /* Finally the instance data itself. */
890 RTMemFree(pGMM);
891 LogFlow(("GMMTerm: done\n"));
892}
893
894
895/**
896 * RTAvlU32Destroy callback.
897 *
898 * @returns 0
899 * @param pNode The node to destroy.
900 * @param pvGMM The GMM handle.
901 */
902static DECLCALLBACK(int) gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM)
903{
904 PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
905
906 if (pChunk->cFree != GMM_CHUNK_NUM_PAGES)
907 SUPR0Printf("GMMR0Term: %RKv/%#x: cFree=%d cPrivate=%d cShared=%d cMappings=%d\n", pChunk,
908 pChunk->Core.Key, pChunk->cFree, pChunk->cPrivate, pChunk->cShared, pChunk->cMappingsX);
909
910 int rc = RTR0MemObjFree(pChunk->hMemObj, true /* fFreeMappings */);
911 if (RT_FAILURE(rc))
912 {
913 SUPR0Printf("GMMR0Term: %RKv/%#x: RTRMemObjFree(%RKv,true) -> %d (cMappings=%d)\n", pChunk,
914 pChunk->Core.Key, pChunk->hMemObj, rc, pChunk->cMappingsX);
915 AssertRC(rc);
916 }
917 pChunk->hMemObj = NIL_RTR0MEMOBJ;
918
919 RTMemFree(pChunk->paMappingsX);
920 pChunk->paMappingsX = NULL;
921
922 RTMemFree(pChunk);
923 NOREF(pvGMM);
924 return 0;
925}
926
927
928/**
929 * Initializes the per-VM data for the GMM.
930 *
931 * This is called from within the GVMM lock (from GVMMR0CreateVM)
932 * and should only initialize the data members so GMMR0CleanupVM
933 * can deal with them. We reserve no memory or anything here,
934 * that's done later in GMMR0InitVM.
935 *
936 * @param pGVM Pointer to the Global VM structure.
937 */
938GMMR0DECL(int) GMMR0InitPerVMData(PGVM pGVM)
939{
940 AssertCompile(RT_SIZEOFMEMB(GVM,gmm.s) <= RT_SIZEOFMEMB(GVM,gmm.padding));
941
942 pGVM->gmm.s.Stats.enmPolicy = GMMOCPOLICY_INVALID;
943 pGVM->gmm.s.Stats.enmPriority = GMMPRIORITY_INVALID;
944 pGVM->gmm.s.Stats.fMayAllocate = false;
945
946 pGVM->gmm.s.hChunkTlbSpinLock = NIL_RTSPINLOCK;
947 int rc = RTSpinlockCreate(&pGVM->gmm.s.hChunkTlbSpinLock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "per-vm-chunk-tlb");
948 AssertRCReturn(rc, rc);
949
950 return VINF_SUCCESS;
951}
952
953
954/**
955 * Acquires the GMM giant lock.
956 *
957 * @returns Assert status code from RTSemFastMutexRequest.
958 * @param pGMM Pointer to the GMM instance.
959 */
960static int gmmR0MutexAcquire(PGMM pGMM)
961{
962 ASMAtomicIncU32(&pGMM->cMtxContenders);
963#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
964 int rc = RTCritSectEnter(&pGMM->GiantCritSect);
965#else
966 int rc = RTSemFastMutexRequest(pGMM->hMtx);
967#endif
968 ASMAtomicDecU32(&pGMM->cMtxContenders);
969 AssertRC(rc);
970#ifdef VBOX_STRICT
971 pGMM->hMtxOwner = RTThreadNativeSelf();
972#endif
973 return rc;
974}
975
976
977/**
978 * Releases the GMM giant lock.
979 *
980 * @returns Assert status code from RTSemFastMutexRequest.
981 * @param pGMM Pointer to the GMM instance.
982 */
983static int gmmR0MutexRelease(PGMM pGMM)
984{
985#ifdef VBOX_STRICT
986 pGMM->hMtxOwner = NIL_RTNATIVETHREAD;
987#endif
988#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
989 int rc = RTCritSectLeave(&pGMM->GiantCritSect);
990#else
991 int rc = RTSemFastMutexRelease(pGMM->hMtx);
992 AssertRC(rc);
993#endif
994 return rc;
995}
996
997
998/**
999 * Yields the GMM giant lock if there is contention and a certain minimum time
1000 * has elapsed since we took it.
1001 *
1002 * @returns @c true if the mutex was yielded, @c false if not.
1003 * @param pGMM Pointer to the GMM instance.
1004 * @param puLockNanoTS Where the lock acquisition time stamp is kept
1005 * (in/out).
1006 */
1007static bool gmmR0MutexYield(PGMM pGMM, uint64_t *puLockNanoTS)
1008{
1009 /*
1010 * If nobody is contending the mutex, don't bother checking the time.
1011 */
1012 if (ASMAtomicReadU32(&pGMM->cMtxContenders) == 0)
1013 return false;
1014
1015 /*
1016 * Don't yield if we haven't executed for at least 2 milliseconds.
1017 */
1018 uint64_t uNanoNow = RTTimeSystemNanoTS();
1019 if (uNanoNow - *puLockNanoTS < UINT32_C(2000000))
1020 return false;
1021
1022 /*
1023 * Yield the mutex.
1024 */
1025#ifdef VBOX_STRICT
1026 pGMM->hMtxOwner = NIL_RTNATIVETHREAD;
1027#endif
1028 ASMAtomicIncU32(&pGMM->cMtxContenders);
1029#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
1030 int rc1 = RTCritSectLeave(&pGMM->GiantCritSect); AssertRC(rc1);
1031#else
1032 int rc1 = RTSemFastMutexRelease(pGMM->hMtx); AssertRC(rc1);
1033#endif
1034
1035 RTThreadYield();
1036
1037#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
1038 int rc2 = RTCritSectEnter(&pGMM->GiantCritSect); AssertRC(rc2);
1039#else
1040 int rc2 = RTSemFastMutexRequest(pGMM->hMtx); AssertRC(rc2);
1041#endif
1042 *puLockNanoTS = RTTimeSystemNanoTS();
1043 ASMAtomicDecU32(&pGMM->cMtxContenders);
1044#ifdef VBOX_STRICT
1045 pGMM->hMtxOwner = RTThreadNativeSelf();
1046#endif
1047
1048 return true;
1049}
1050
1051
1052/**
1053 * Acquires a chunk lock.
1054 *
1055 * The caller must own the giant lock.
1056 *
1057 * @returns Assert status code from RTSemFastMutexRequest.
1058 * @param pMtxState The chunk mutex state info. (Avoids
1059 * passing the same flags and stuff around
1060 * for subsequent release and drop-giant
1061 * calls.)
1062 * @param pGMM Pointer to the GMM instance.
1063 * @param pChunk Pointer to the chunk.
1064 * @param fFlags Flags regarding the giant lock, GMMR0CHUNK_MTX_XXX.
1065 */
1066static int gmmR0ChunkMutexAcquire(PGMMR0CHUNKMTXSTATE pMtxState, PGMM pGMM, PGMMCHUNK pChunk, uint32_t fFlags)
1067{
1068 Assert(fFlags > GMMR0CHUNK_MTX_INVALID && fFlags < GMMR0CHUNK_MTX_END);
1069 Assert(pGMM->hMtxOwner == RTThreadNativeSelf());
1070
1071 pMtxState->pGMM = pGMM;
1072 pMtxState->fFlags = (uint8_t)fFlags;
1073
1074 /*
1075 * Get the lock index and reference the lock.
1076 */
1077 Assert(pGMM->hMtxOwner == RTThreadNativeSelf());
1078 uint32_t iChunkMtx = pChunk->iChunkMtx;
1079 if (iChunkMtx == UINT8_MAX)
1080 {
1081 iChunkMtx = pGMM->iNextChunkMtx++;
1082 iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
1083
1084 /* Try get an unused one... */
1085 if (pGMM->aChunkMtx[iChunkMtx].cUsers)
1086 {
1087 iChunkMtx = pGMM->iNextChunkMtx++;
1088 iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
1089 if (pGMM->aChunkMtx[iChunkMtx].cUsers)
1090 {
1091 iChunkMtx = pGMM->iNextChunkMtx++;
1092 iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
1093 if (pGMM->aChunkMtx[iChunkMtx].cUsers)
1094 {
1095 iChunkMtx = pGMM->iNextChunkMtx++;
1096 iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
1097 }
1098 }
1099 }
1100
1101 pChunk->iChunkMtx = iChunkMtx;
1102 }
1103 AssertCompile(RT_ELEMENTS(pGMM->aChunkMtx) < UINT8_MAX);
1104 pMtxState->iChunkMtx = (uint8_t)iChunkMtx;
1105 ASMAtomicIncU32(&pGMM->aChunkMtx[iChunkMtx].cUsers);
1106
1107 /*
1108 * Drop the giant?
1109 */
1110 if (fFlags != GMMR0CHUNK_MTX_KEEP_GIANT)
1111 {
1112 /** @todo GMM life cycle cleanup (we may race someone
1113 * destroying and cleaning up GMM)? */
1114 gmmR0MutexRelease(pGMM);
1115 }
1116
1117 /*
1118 * Take the chunk mutex.
1119 */
1120 int rc = RTSemFastMutexRequest(pGMM->aChunkMtx[iChunkMtx].hMtx);
1121 AssertRC(rc);
1122 return rc;
1123}
1124
1125
1126/**
1127 * Releases the GMM giant lock.
1128 *
1129 * @returns Assert status code from RTSemFastMutexRequest.
1130 * @param pMtxState Pointer to the chunk mutex state.
1131 * @param pChunk Pointer to the chunk if it's still
1132 * alive, NULL if it isn't. This is used to deassociate
1133 * the chunk from the mutex on the way out so a new one
1134 * can be selected next time, thus avoiding contented
1135 * mutexes.
1136 */
1137static int gmmR0ChunkMutexRelease(PGMMR0CHUNKMTXSTATE pMtxState, PGMMCHUNK pChunk)
1138{
1139 PGMM pGMM = pMtxState->pGMM;
1140
1141 /*
1142 * Release the chunk mutex and reacquire the giant if requested.
1143 */
1144 int rc = RTSemFastMutexRelease(pGMM->aChunkMtx[pMtxState->iChunkMtx].hMtx);
1145 AssertRC(rc);
1146 if (pMtxState->fFlags == GMMR0CHUNK_MTX_RETAKE_GIANT)
1147 rc = gmmR0MutexAcquire(pGMM);
1148 else
1149 Assert((pMtxState->fFlags != GMMR0CHUNK_MTX_DROP_GIANT) == (pGMM->hMtxOwner == RTThreadNativeSelf()));
1150
1151 /*
1152 * Drop the chunk mutex user reference and deassociate it from the chunk
1153 * when possible.
1154 */
1155 if ( ASMAtomicDecU32(&pGMM->aChunkMtx[pMtxState->iChunkMtx].cUsers) == 0
1156 && pChunk
1157 && RT_SUCCESS(rc) )
1158 {
1159 if (pMtxState->fFlags != GMMR0CHUNK_MTX_DROP_GIANT)
1160 pChunk->iChunkMtx = UINT8_MAX;
1161 else
1162 {
1163 rc = gmmR0MutexAcquire(pGMM);
1164 if (RT_SUCCESS(rc))
1165 {
1166 if (pGMM->aChunkMtx[pMtxState->iChunkMtx].cUsers == 0)
1167 pChunk->iChunkMtx = UINT8_MAX;
1168 rc = gmmR0MutexRelease(pGMM);
1169 }
1170 }
1171 }
1172
1173 pMtxState->pGMM = NULL;
1174 return rc;
1175}
1176
1177
1178/**
1179 * Drops the giant GMM lock we kept in gmmR0ChunkMutexAcquire while keeping the
1180 * chunk locked.
1181 *
1182 * This only works if gmmR0ChunkMutexAcquire was called with
1183 * GMMR0CHUNK_MTX_KEEP_GIANT. gmmR0ChunkMutexRelease will retake the giant
1184 * mutex, i.e. behave as if GMMR0CHUNK_MTX_RETAKE_GIANT was used.
1185 *
1186 * @returns VBox status code (assuming success is ok).
1187 * @param pMtxState Pointer to the chunk mutex state.
1188 */
1189static int gmmR0ChunkMutexDropGiant(PGMMR0CHUNKMTXSTATE pMtxState)
1190{
1191 AssertReturn(pMtxState->fFlags == GMMR0CHUNK_MTX_KEEP_GIANT, VERR_GMM_MTX_FLAGS);
1192 Assert(pMtxState->pGMM->hMtxOwner == RTThreadNativeSelf());
1193 pMtxState->fFlags = GMMR0CHUNK_MTX_RETAKE_GIANT;
1194 /** @todo GMM life cycle cleanup (we may race someone
1195 * destroying and cleaning up GMM)? */
1196 return gmmR0MutexRelease(pMtxState->pGMM);
1197}
1198
1199
1200/**
1201 * For experimenting with NUMA affinity and such.
1202 *
1203 * @returns The current NUMA Node ID.
1204 */
1205static uint16_t gmmR0GetCurrentNumaNodeId(void)
1206{
1207#if 1
1208 return GMM_CHUNK_NUMA_ID_UNKNOWN;
1209#else
1210 return RTMpCpuId() / 16;
1211#endif
1212}
1213
1214
1215
1216/**
1217 * Cleans up when a VM is terminating.
1218 *
1219 * @param pGVM Pointer to the Global VM structure.
1220 */
1221GMMR0DECL(void) GMMR0CleanupVM(PGVM pGVM)
1222{
1223 LogFlow(("GMMR0CleanupVM: pGVM=%p:{.hSelf=%#x}\n", pGVM, pGVM->hSelf));
1224
1225 PGMM pGMM;
1226 GMM_GET_VALID_INSTANCE_VOID(pGMM);
1227
1228#ifdef VBOX_WITH_PAGE_SHARING
1229 /*
1230 * Clean up all registered shared modules first.
1231 */
1232 gmmR0SharedModuleCleanup(pGMM, pGVM);
1233#endif
1234
1235 gmmR0MutexAcquire(pGMM);
1236 uint64_t uLockNanoTS = RTTimeSystemNanoTS();
1237 GMM_CHECK_SANITY_UPON_ENTERING(pGMM);
1238
1239 /*
1240 * The policy is 'INVALID' until the initial reservation
1241 * request has been serviced.
1242 */
1243 if ( pGVM->gmm.s.Stats.enmPolicy > GMMOCPOLICY_INVALID
1244 && pGVM->gmm.s.Stats.enmPolicy < GMMOCPOLICY_END)
1245 {
1246 /*
1247 * If it's the last VM around, we can skip walking all the chunk looking
1248 * for the pages owned by this VM and instead flush the whole shebang.
1249 *
1250 * This takes care of the eventuality that a VM has left shared page
1251 * references behind (shouldn't happen of course, but you never know).
1252 */
1253 Assert(pGMM->cRegisteredVMs);
1254 pGMM->cRegisteredVMs--;
1255
1256 /*
1257 * Walk the entire pool looking for pages that belong to this VM
1258 * and leftover mappings. (This'll only catch private pages,
1259 * shared pages will be 'left behind'.)
1260 */
1261 /** @todo r=bird: This scanning+freeing could be optimized in bound mode! */
1262 uint64_t cPrivatePages = pGVM->gmm.s.Stats.cPrivatePages; /* save */
1263
1264 unsigned iCountDown = 64;
1265 bool fRedoFromStart;
1266 PGMMCHUNK pChunk;
1267 do
1268 {
1269 fRedoFromStart = false;
1270 RTListForEachReverse(&pGMM->ChunkList, pChunk, GMMCHUNK, ListNode)
1271 {
1272 uint32_t const cFreeChunksOld = pGMM->cFreedChunks;
1273 if ( ( !pGMM->fBoundMemoryMode
1274 || pChunk->hGVM == pGVM->hSelf)
1275 && gmmR0CleanupVMScanChunk(pGMM, pGVM, pChunk))
1276 {
1277 /* We left the giant mutex, so reset the yield counters. */
1278 uLockNanoTS = RTTimeSystemNanoTS();
1279 iCountDown = 64;
1280 }
1281 else
1282 {
1283 /* Didn't leave it, so do normal yielding. */
1284 if (!iCountDown)
1285 gmmR0MutexYield(pGMM, &uLockNanoTS);
1286 else
1287 iCountDown--;
1288 }
1289 if (pGMM->cFreedChunks != cFreeChunksOld)
1290 {
1291 fRedoFromStart = true;
1292 break;
1293 }
1294 }
1295 } while (fRedoFromStart);
1296
1297 if (pGVM->gmm.s.Stats.cPrivatePages)
1298 SUPR0Printf("GMMR0CleanupVM: hGVM=%#x has %#x private pages that cannot be found!\n", pGVM->hSelf, pGVM->gmm.s.Stats.cPrivatePages);
1299
1300 pGMM->cAllocatedPages -= cPrivatePages;
1301
1302 /*
1303 * Free empty chunks.
1304 */
1305 PGMMCHUNKFREESET pPrivateSet = pGMM->fBoundMemoryMode ? &pGVM->gmm.s.Private : &pGMM->PrivateX;
1306 do
1307 {
1308 fRedoFromStart = false;
1309 iCountDown = 10240;
1310 pChunk = pPrivateSet->apLists[GMM_CHUNK_FREE_SET_UNUSED_LIST];
1311 while (pChunk)
1312 {
1313 PGMMCHUNK pNext = pChunk->pFreeNext;
1314 Assert(pChunk->cFree == GMM_CHUNK_NUM_PAGES);
1315 if ( !pGMM->fBoundMemoryMode
1316 || pChunk->hGVM == pGVM->hSelf)
1317 {
1318 uint64_t const idGenerationOld = pPrivateSet->idGeneration;
1319 if (gmmR0FreeChunk(pGMM, pGVM, pChunk, true /*fRelaxedSem*/))
1320 {
1321 /* We've left the giant mutex, restart? (+1 for our unlink) */
1322 fRedoFromStart = pPrivateSet->idGeneration != idGenerationOld + 1;
1323 if (fRedoFromStart)
1324 break;
1325 uLockNanoTS = RTTimeSystemNanoTS();
1326 iCountDown = 10240;
1327 }
1328 }
1329
1330 /* Advance and maybe yield the lock. */
1331 pChunk = pNext;
1332 if (--iCountDown == 0)
1333 {
1334 uint64_t const idGenerationOld = pPrivateSet->idGeneration;
1335 fRedoFromStart = gmmR0MutexYield(pGMM, &uLockNanoTS)
1336 && pPrivateSet->idGeneration != idGenerationOld;
1337 if (fRedoFromStart)
1338 break;
1339 iCountDown = 10240;
1340 }
1341 }
1342 } while (fRedoFromStart);
1343
1344 /*
1345 * Account for shared pages that weren't freed.
1346 */
1347 if (pGVM->gmm.s.Stats.cSharedPages)
1348 {
1349 Assert(pGMM->cSharedPages >= pGVM->gmm.s.Stats.cSharedPages);
1350 SUPR0Printf("GMMR0CleanupVM: hGVM=%#x left %#x shared pages behind!\n", pGVM->hSelf, pGVM->gmm.s.Stats.cSharedPages);
1351 pGMM->cLeftBehindSharedPages += pGVM->gmm.s.Stats.cSharedPages;
1352 }
1353
1354 /*
1355 * Clean up balloon statistics in case the VM process crashed.
1356 */
1357 Assert(pGMM->cBalloonedPages >= pGVM->gmm.s.Stats.cBalloonedPages);
1358 pGMM->cBalloonedPages -= pGVM->gmm.s.Stats.cBalloonedPages;
1359
1360 /*
1361 * Update the over-commitment management statistics.
1362 */
1363 pGMM->cReservedPages -= pGVM->gmm.s.Stats.Reserved.cBasePages
1364 + pGVM->gmm.s.Stats.Reserved.cFixedPages
1365 + pGVM->gmm.s.Stats.Reserved.cShadowPages;
1366 switch (pGVM->gmm.s.Stats.enmPolicy)
1367 {
1368 case GMMOCPOLICY_NO_OC:
1369 break;
1370 default:
1371 /** @todo Update GMM->cOverCommittedPages */
1372 break;
1373 }
1374 }
1375
1376 /* zap the GVM data. */
1377 pGVM->gmm.s.Stats.enmPolicy = GMMOCPOLICY_INVALID;
1378 pGVM->gmm.s.Stats.enmPriority = GMMPRIORITY_INVALID;
1379 pGVM->gmm.s.Stats.fMayAllocate = false;
1380
1381 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1382 gmmR0MutexRelease(pGMM);
1383
1384 /*
1385 * Destroy the spinlock.
1386 */
1387 RTSPINLOCK hSpinlock = NIL_RTSPINLOCK;
1388 ASMAtomicXchgHandle(&pGVM->gmm.s.hChunkTlbSpinLock, NIL_RTSPINLOCK, &hSpinlock);
1389 RTSpinlockDestroy(hSpinlock);
1390
1391 LogFlow(("GMMR0CleanupVM: returns\n"));
1392}
1393
1394
1395/**
1396 * Scan one chunk for private pages belonging to the specified VM.
1397 *
1398 * @note This function may drop the giant mutex!
1399 *
1400 * @returns @c true if we've temporarily dropped the giant mutex, @c false if
1401 * we didn't.
1402 * @param pGMM Pointer to the GMM instance.
1403 * @param pGVM The global VM handle.
1404 * @param pChunk The chunk to scan.
1405 */
1406static bool gmmR0CleanupVMScanChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
1407{
1408 Assert(!pGMM->fBoundMemoryMode || pChunk->hGVM == pGVM->hSelf);
1409
1410 /*
1411 * Look for pages belonging to the VM.
1412 * (Perform some internal checks while we're scanning.)
1413 */
1414#ifndef VBOX_STRICT
1415 if (pChunk->cFree != GMM_CHUNK_NUM_PAGES)
1416#endif
1417 {
1418 unsigned cPrivate = 0;
1419 unsigned cShared = 0;
1420 unsigned cFree = 0;
1421
1422 gmmR0UnlinkChunk(pChunk); /* avoiding cFreePages updates. */
1423
1424 uint16_t hGVM = pGVM->hSelf;
1425 unsigned iPage = (GMM_CHUNK_SIZE >> GUEST_PAGE_SHIFT);
1426 while (iPage-- > 0)
1427 if (GMM_PAGE_IS_PRIVATE(&pChunk->aPages[iPage]))
1428 {
1429 if (pChunk->aPages[iPage].Private.hGVM == hGVM)
1430 {
1431 /*
1432 * Free the page.
1433 *
1434 * The reason for not using gmmR0FreePrivatePage here is that we
1435 * must *not* cause the chunk to be freed from under us - we're in
1436 * an AVL tree walk here.
1437 */
1438 pChunk->aPages[iPage].u = 0;
1439 pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
1440 pChunk->aPages[iPage].Free.fZeroed = false;
1441 pChunk->aPages[iPage].Free.iNext = pChunk->iFreeHead;
1442 pChunk->iFreeHead = iPage;
1443 pChunk->cPrivate--;
1444 pChunk->cFree++;
1445 pGVM->gmm.s.Stats.cPrivatePages--;
1446 cFree++;
1447 }
1448 else
1449 cPrivate++;
1450 }
1451 else if (GMM_PAGE_IS_FREE(&pChunk->aPages[iPage]))
1452 cFree++;
1453 else
1454 cShared++;
1455
1456 gmmR0SelectSetAndLinkChunk(pGMM, pGVM, pChunk);
1457
1458 /*
1459 * Did it add up?
1460 */
1461 if (RT_UNLIKELY( pChunk->cFree != cFree
1462 || pChunk->cPrivate != cPrivate
1463 || pChunk->cShared != cShared))
1464 {
1465 SUPR0Printf("gmmR0CleanupVMScanChunk: Chunk %RKv/%#x has bogus stats - free=%d/%d private=%d/%d shared=%d/%d\n",
1466 pChunk, pChunk->Core.Key, pChunk->cFree, cFree, pChunk->cPrivate, cPrivate, pChunk->cShared, cShared);
1467 pChunk->cFree = cFree;
1468 pChunk->cPrivate = cPrivate;
1469 pChunk->cShared = cShared;
1470 }
1471 }
1472
1473 /*
1474 * If not in bound memory mode, we should reset the hGVM field
1475 * if it has our handle in it.
1476 */
1477 if (pChunk->hGVM == pGVM->hSelf)
1478 {
1479 if (!g_pGMM->fBoundMemoryMode)
1480 pChunk->hGVM = NIL_GVM_HANDLE;
1481 else if (pChunk->cFree != GMM_CHUNK_NUM_PAGES)
1482 {
1483 SUPR0Printf("gmmR0CleanupVMScanChunk: %RKv/%#x: cFree=%#x - it should be 0 in bound mode!\n",
1484 pChunk, pChunk->Core.Key, pChunk->cFree);
1485 AssertMsgFailed(("%p/%#x: cFree=%#x - it should be 0 in bound mode!\n", pChunk, pChunk->Core.Key, pChunk->cFree));
1486
1487 gmmR0UnlinkChunk(pChunk);
1488 pChunk->cFree = GMM_CHUNK_NUM_PAGES;
1489 gmmR0SelectSetAndLinkChunk(pGMM, pGVM, pChunk);
1490 }
1491 }
1492
1493 /*
1494 * Look for a mapping belonging to the terminating VM.
1495 */
1496 GMMR0CHUNKMTXSTATE MtxState;
1497 gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk, GMMR0CHUNK_MTX_KEEP_GIANT);
1498 unsigned cMappings = pChunk->cMappingsX;
1499 for (unsigned i = 0; i < cMappings; i++)
1500 if (pChunk->paMappingsX[i].pGVM == pGVM)
1501 {
1502 gmmR0ChunkMutexDropGiant(&MtxState);
1503
1504 RTR0MEMOBJ hMemObj = pChunk->paMappingsX[i].hMapObj;
1505
1506 cMappings--;
1507 if (i < cMappings)
1508 pChunk->paMappingsX[i] = pChunk->paMappingsX[cMappings];
1509 pChunk->paMappingsX[cMappings].pGVM = NULL;
1510 pChunk->paMappingsX[cMappings].hMapObj = NIL_RTR0MEMOBJ;
1511 Assert(pChunk->cMappingsX - 1U == cMappings);
1512 pChunk->cMappingsX = cMappings;
1513
1514 int rc = RTR0MemObjFree(hMemObj, false /* fFreeMappings (NA) */);
1515 if (RT_FAILURE(rc))
1516 {
1517 SUPR0Printf("gmmR0CleanupVMScanChunk: %RKv/%#x: mapping #%x: RTRMemObjFree(%RKv,false) -> %d \n",
1518 pChunk, pChunk->Core.Key, i, hMemObj, rc);
1519 AssertRC(rc);
1520 }
1521
1522 gmmR0ChunkMutexRelease(&MtxState, pChunk);
1523 return true;
1524 }
1525
1526 gmmR0ChunkMutexRelease(&MtxState, pChunk);
1527 return false;
1528}
1529
1530
1531/**
1532 * The initial resource reservations.
1533 *
1534 * This will make memory reservations according to policy and priority. If there aren't
1535 * sufficient resources available to sustain the VM this function will fail and all
1536 * future allocations requests will fail as well.
1537 *
1538 * These are just the initial reservations made very very early during the VM creation
1539 * process and will be adjusted later in the GMMR0UpdateReservation call after the
1540 * ring-3 init has completed.
1541 *
1542 * @returns VBox status code.
1543 * @retval VERR_GMM_MEMORY_RESERVATION_DECLINED
1544 * @retval VERR_GMM_
1545 *
1546 * @param pGVM The global (ring-0) VM structure.
1547 * @param idCpu The VCPU id - must be zero.
1548 * @param cBasePages The number of pages that may be allocated for the base RAM and ROMs.
1549 * This does not include MMIO2 and similar.
1550 * @param cShadowPages The number of pages that may be allocated for shadow paging structures.
1551 * @param cFixedPages The number of pages that may be allocated for fixed objects like the
1552 * hyper heap, MMIO2 and similar.
1553 * @param enmPolicy The OC policy to use on this VM.
1554 * @param enmPriority The priority in an out-of-memory situation.
1555 *
1556 * @thread The creator thread / EMT(0).
1557 */
1558GMMR0DECL(int) GMMR0InitialReservation(PGVM pGVM, VMCPUID idCpu, uint64_t cBasePages, uint32_t cShadowPages,
1559 uint32_t cFixedPages, GMMOCPOLICY enmPolicy, GMMPRIORITY enmPriority)
1560{
1561 LogFlow(("GMMR0InitialReservation: pGVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x enmPolicy=%d enmPriority=%d\n",
1562 pGVM, cBasePages, cShadowPages, cFixedPages, enmPolicy, enmPriority));
1563
1564 /*
1565 * Validate, get basics and take the semaphore.
1566 */
1567 AssertReturn(idCpu == 0, VERR_INVALID_CPU_ID);
1568 PGMM pGMM;
1569 GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
1570 int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
1571 if (RT_FAILURE(rc))
1572 return rc;
1573
1574 AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
1575 AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
1576 AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
1577 AssertReturn(enmPolicy > GMMOCPOLICY_INVALID && enmPolicy < GMMOCPOLICY_END, VERR_INVALID_PARAMETER);
1578 AssertReturn(enmPriority > GMMPRIORITY_INVALID && enmPriority < GMMPRIORITY_END, VERR_INVALID_PARAMETER);
1579
1580 gmmR0MutexAcquire(pGMM);
1581 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1582 {
1583 if ( !pGVM->gmm.s.Stats.Reserved.cBasePages
1584 && !pGVM->gmm.s.Stats.Reserved.cFixedPages
1585 && !pGVM->gmm.s.Stats.Reserved.cShadowPages)
1586 {
1587 /*
1588 * Check if we can accommodate this.
1589 */
1590 /* ... later ... */
1591 if (RT_SUCCESS(rc))
1592 {
1593 /*
1594 * Update the records.
1595 */
1596 pGVM->gmm.s.Stats.Reserved.cBasePages = cBasePages;
1597 pGVM->gmm.s.Stats.Reserved.cFixedPages = cFixedPages;
1598 pGVM->gmm.s.Stats.Reserved.cShadowPages = cShadowPages;
1599 pGVM->gmm.s.Stats.enmPolicy = enmPolicy;
1600 pGVM->gmm.s.Stats.enmPriority = enmPriority;
1601 pGVM->gmm.s.Stats.fMayAllocate = true;
1602
1603 pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
1604 pGMM->cRegisteredVMs++;
1605 }
1606 }
1607 else
1608 rc = VERR_WRONG_ORDER;
1609 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1610 }
1611 else
1612 rc = VERR_GMM_IS_NOT_SANE;
1613 gmmR0MutexRelease(pGMM);
1614 LogFlow(("GMMR0InitialReservation: returns %Rrc\n", rc));
1615 return rc;
1616}
1617
1618
1619/**
1620 * VMMR0 request wrapper for GMMR0InitialReservation.
1621 *
1622 * @returns see GMMR0InitialReservation.
1623 * @param pGVM The global (ring-0) VM structure.
1624 * @param idCpu The VCPU id.
1625 * @param pReq Pointer to the request packet.
1626 */
1627GMMR0DECL(int) GMMR0InitialReservationReq(PGVM pGVM, VMCPUID idCpu, PGMMINITIALRESERVATIONREQ pReq)
1628{
1629 /*
1630 * Validate input and pass it on.
1631 */
1632 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1633 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
1634 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
1635
1636 return GMMR0InitialReservation(pGVM, idCpu, pReq->cBasePages, pReq->cShadowPages,
1637 pReq->cFixedPages, pReq->enmPolicy, pReq->enmPriority);
1638}
1639
1640
1641/**
1642 * This updates the memory reservation with the additional MMIO2 and ROM pages.
1643 *
1644 * @returns VBox status code.
1645 * @retval VERR_GMM_MEMORY_RESERVATION_DECLINED
1646 *
1647 * @param pGVM The global (ring-0) VM structure.
1648 * @param idCpu The VCPU id.
1649 * @param cBasePages The number of pages that may be allocated for the base RAM and ROMs.
1650 * This does not include MMIO2 and similar.
1651 * @param cShadowPages The number of pages that may be allocated for shadow paging structures.
1652 * @param cFixedPages The number of pages that may be allocated for fixed objects like the
1653 * hyper heap, MMIO2 and similar.
1654 *
1655 * @thread EMT(idCpu)
1656 */
1657GMMR0DECL(int) GMMR0UpdateReservation(PGVM pGVM, VMCPUID idCpu, uint64_t cBasePages,
1658 uint32_t cShadowPages, uint32_t cFixedPages)
1659{
1660 LogFlow(("GMMR0UpdateReservation: pGVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x\n",
1661 pGVM, cBasePages, cShadowPages, cFixedPages));
1662
1663 /*
1664 * Validate, get basics and take the semaphore.
1665 */
1666 PGMM pGMM;
1667 GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
1668 int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
1669 if (RT_FAILURE(rc))
1670 return rc;
1671
1672 AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
1673 AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
1674 AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
1675
1676 gmmR0MutexAcquire(pGMM);
1677 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1678 {
1679 if ( pGVM->gmm.s.Stats.Reserved.cBasePages
1680 && pGVM->gmm.s.Stats.Reserved.cFixedPages
1681 && pGVM->gmm.s.Stats.Reserved.cShadowPages)
1682 {
1683 /*
1684 * Check if we can accommodate this.
1685 */
1686 /* ... later ... */
1687 if (RT_SUCCESS(rc))
1688 {
1689 /*
1690 * Update the records.
1691 */
1692 pGMM->cReservedPages -= pGVM->gmm.s.Stats.Reserved.cBasePages
1693 + pGVM->gmm.s.Stats.Reserved.cFixedPages
1694 + pGVM->gmm.s.Stats.Reserved.cShadowPages;
1695 pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
1696
1697 pGVM->gmm.s.Stats.Reserved.cBasePages = cBasePages;
1698 pGVM->gmm.s.Stats.Reserved.cFixedPages = cFixedPages;
1699 pGVM->gmm.s.Stats.Reserved.cShadowPages = cShadowPages;
1700 }
1701 }
1702 else
1703 rc = VERR_WRONG_ORDER;
1704 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1705 }
1706 else
1707 rc = VERR_GMM_IS_NOT_SANE;
1708 gmmR0MutexRelease(pGMM);
1709 LogFlow(("GMMR0UpdateReservation: returns %Rrc\n", rc));
1710 return rc;
1711}
1712
1713
1714/**
1715 * VMMR0 request wrapper for GMMR0UpdateReservation.
1716 *
1717 * @returns see GMMR0UpdateReservation.
1718 * @param pGVM The global (ring-0) VM structure.
1719 * @param idCpu The VCPU id.
1720 * @param pReq Pointer to the request packet.
1721 */
1722GMMR0DECL(int) GMMR0UpdateReservationReq(PGVM pGVM, VMCPUID idCpu, PGMMUPDATERESERVATIONREQ pReq)
1723{
1724 /*
1725 * Validate input and pass it on.
1726 */
1727 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
1728 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
1729
1730 return GMMR0UpdateReservation(pGVM, idCpu, pReq->cBasePages, pReq->cShadowPages, pReq->cFixedPages);
1731}
1732
1733#ifdef GMMR0_WITH_SANITY_CHECK
1734
1735/**
1736 * Performs sanity checks on a free set.
1737 *
1738 * @returns Error count.
1739 *
1740 * @param pGMM Pointer to the GMM instance.
1741 * @param pSet Pointer to the set.
1742 * @param pszSetName The set name.
1743 * @param pszFunction The function from which it was called.
1744 * @param uLine The line number.
1745 */
1746static uint32_t gmmR0SanityCheckSet(PGMM pGMM, PGMMCHUNKFREESET pSet, const char *pszSetName,
1747 const char *pszFunction, unsigned uLineNo)
1748{
1749 uint32_t cErrors = 0;
1750
1751 /*
1752 * Count the free pages in all the chunks and match it against pSet->cFreePages.
1753 */
1754 uint32_t cPages = 0;
1755 for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
1756 {
1757 for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
1758 {
1759 /** @todo check that the chunk is hash into the right set. */
1760 cPages += pCur->cFree;
1761 }
1762 }
1763 if (RT_UNLIKELY(cPages != pSet->cFreePages))
1764 {
1765 SUPR0Printf("GMM insanity: found %#x pages in the %s set, expected %#x. (%s, line %u)\n",
1766 cPages, pszSetName, pSet->cFreePages, pszFunction, uLineNo);
1767 cErrors++;
1768 }
1769
1770 return cErrors;
1771}
1772
1773
1774/**
1775 * Performs some sanity checks on the GMM while owning lock.
1776 *
1777 * @returns Error count.
1778 *
1779 * @param pGMM Pointer to the GMM instance.
1780 * @param pszFunction The function from which it is called.
1781 * @param uLineNo The line number.
1782 */
1783static uint32_t gmmR0SanityCheck(PGMM pGMM, const char *pszFunction, unsigned uLineNo)
1784{
1785 uint32_t cErrors = 0;
1786
1787 cErrors += gmmR0SanityCheckSet(pGMM, &pGMM->PrivateX, "private", pszFunction, uLineNo);
1788 cErrors += gmmR0SanityCheckSet(pGMM, &pGMM->Shared, "shared", pszFunction, uLineNo);
1789 /** @todo add more sanity checks. */
1790
1791 return cErrors;
1792}
1793
1794#endif /* GMMR0_WITH_SANITY_CHECK */
1795
1796/**
1797 * Looks up a chunk in the tree and fill in the TLB entry for it.
1798 *
1799 * This is not expected to fail and will bitch if it does.
1800 *
1801 * @returns Pointer to the allocation chunk, NULL if not found.
1802 * @param pGMM Pointer to the GMM instance.
1803 * @param idChunk The ID of the chunk to find.
1804 * @param pTlbe Pointer to the TLB entry.
1805 *
1806 * @note Caller owns spinlock.
1807 */
1808static PGMMCHUNK gmmR0GetChunkSlow(PGMM pGMM, uint32_t idChunk, PGMMCHUNKTLBE pTlbe)
1809{
1810 PGMMCHUNK pChunk = (PGMMCHUNK)RTAvlU32Get(&pGMM->pChunks, idChunk);
1811 AssertMsgReturn(pChunk, ("Chunk %#x not found!\n", idChunk), NULL);
1812 pTlbe->idChunk = idChunk;
1813 pTlbe->pChunk = pChunk;
1814 return pChunk;
1815}
1816
1817
1818/**
1819 * Finds a allocation chunk, spin-locked.
1820 *
1821 * This is not expected to fail and will bitch if it does.
1822 *
1823 * @returns Pointer to the allocation chunk, NULL if not found.
1824 * @param pGMM Pointer to the GMM instance.
1825 * @param idChunk The ID of the chunk to find.
1826 */
1827DECLINLINE(PGMMCHUNK) gmmR0GetChunkLocked(PGMM pGMM, uint32_t idChunk)
1828{
1829 /*
1830 * Do a TLB lookup, branch if not in the TLB.
1831 */
1832 PGMMCHUNKTLBE pTlbe = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(idChunk)];
1833 PGMMCHUNK pChunk = pTlbe->pChunk;
1834 if ( pChunk == NULL
1835 || pTlbe->idChunk != idChunk)
1836 pChunk = gmmR0GetChunkSlow(pGMM, idChunk, pTlbe);
1837 return pChunk;
1838}
1839
1840
1841/**
1842 * Finds a allocation chunk.
1843 *
1844 * This is not expected to fail and will bitch if it does.
1845 *
1846 * @returns Pointer to the allocation chunk, NULL if not found.
1847 * @param pGMM Pointer to the GMM instance.
1848 * @param idChunk The ID of the chunk to find.
1849 */
1850DECLINLINE(PGMMCHUNK) gmmR0GetChunk(PGMM pGMM, uint32_t idChunk)
1851{
1852 RTSpinlockAcquire(pGMM->hSpinLockTree);
1853 PGMMCHUNK pChunk = gmmR0GetChunkLocked(pGMM, idChunk);
1854 RTSpinlockRelease(pGMM->hSpinLockTree);
1855 return pChunk;
1856}
1857
1858
1859/**
1860 * Finds a page.
1861 *
1862 * This is not expected to fail and will bitch if it does.
1863 *
1864 * @returns Pointer to the page, NULL if not found.
1865 * @param pGMM Pointer to the GMM instance.
1866 * @param idPage The ID of the page to find.
1867 */
1868DECLINLINE(PGMMPAGE) gmmR0GetPage(PGMM pGMM, uint32_t idPage)
1869{
1870 PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
1871 if (RT_LIKELY(pChunk))
1872 return &pChunk->aPages[idPage & GMM_PAGEID_IDX_MASK];
1873 return NULL;
1874}
1875
1876
1877#if 0 /* unused */
1878/**
1879 * Gets the host physical address for a page given by it's ID.
1880 *
1881 * @returns The host physical address or NIL_RTHCPHYS.
1882 * @param pGMM Pointer to the GMM instance.
1883 * @param idPage The ID of the page to find.
1884 */
1885DECLINLINE(RTHCPHYS) gmmR0GetPageHCPhys(PGMM pGMM, uint32_t idPage)
1886{
1887 PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
1888 if (RT_LIKELY(pChunk))
1889 return RTR0MemObjGetPagePhysAddr(pChunk->hMemObj, idPage & GMM_PAGEID_IDX_MASK);
1890 return NIL_RTHCPHYS;
1891}
1892#endif /* unused */
1893
1894
1895/**
1896 * Selects the appropriate free list given the number of free pages.
1897 *
1898 * @returns Free list index.
1899 * @param cFree The number of free pages in the chunk.
1900 */
1901DECLINLINE(unsigned) gmmR0SelectFreeSetList(unsigned cFree)
1902{
1903 unsigned iList = cFree >> GMM_CHUNK_FREE_SET_SHIFT;
1904 AssertMsg(iList < RT_SIZEOFMEMB(GMMCHUNKFREESET, apLists) / RT_SIZEOFMEMB(GMMCHUNKFREESET, apLists[0]),
1905 ("%d (%u)\n", iList, cFree));
1906 return iList;
1907}
1908
1909
1910/**
1911 * Unlinks the chunk from the free list it's currently on (if any).
1912 *
1913 * @param pChunk The allocation chunk.
1914 */
1915DECLINLINE(void) gmmR0UnlinkChunk(PGMMCHUNK pChunk)
1916{
1917 PGMMCHUNKFREESET pSet = pChunk->pSet;
1918 if (RT_LIKELY(pSet))
1919 {
1920 pSet->cFreePages -= pChunk->cFree;
1921 pSet->idGeneration++;
1922
1923 PGMMCHUNK pPrev = pChunk->pFreePrev;
1924 PGMMCHUNK pNext = pChunk->pFreeNext;
1925 if (pPrev)
1926 pPrev->pFreeNext = pNext;
1927 else
1928 pSet->apLists[gmmR0SelectFreeSetList(pChunk->cFree)] = pNext;
1929 if (pNext)
1930 pNext->pFreePrev = pPrev;
1931
1932 pChunk->pSet = NULL;
1933 pChunk->pFreeNext = NULL;
1934 pChunk->pFreePrev = NULL;
1935 }
1936 else
1937 {
1938 Assert(!pChunk->pFreeNext);
1939 Assert(!pChunk->pFreePrev);
1940 Assert(!pChunk->cFree);
1941 }
1942}
1943
1944
1945/**
1946 * Links the chunk onto the appropriate free list in the specified free set.
1947 *
1948 * If no free entries, it's not linked into any list.
1949 *
1950 * @param pChunk The allocation chunk.
1951 * @param pSet The free set.
1952 */
1953DECLINLINE(void) gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet)
1954{
1955 Assert(!pChunk->pSet);
1956 Assert(!pChunk->pFreeNext);
1957 Assert(!pChunk->pFreePrev);
1958
1959 if (pChunk->cFree > 0)
1960 {
1961 pChunk->pSet = pSet;
1962 pChunk->pFreePrev = NULL;
1963 unsigned const iList = gmmR0SelectFreeSetList(pChunk->cFree);
1964 pChunk->pFreeNext = pSet->apLists[iList];
1965 if (pChunk->pFreeNext)
1966 pChunk->pFreeNext->pFreePrev = pChunk;
1967 pSet->apLists[iList] = pChunk;
1968
1969 pSet->cFreePages += pChunk->cFree;
1970 pSet->idGeneration++;
1971 }
1972}
1973
1974
1975/**
1976 * Links the chunk onto the appropriate free list in the specified free set.
1977 *
1978 * If no free entries, it's not linked into any list.
1979 *
1980 * @param pGMM Pointer to the GMM instance.
1981 * @param pGVM Pointer to the kernel-only VM instace data.
1982 * @param pChunk The allocation chunk.
1983 */
1984DECLINLINE(void) gmmR0SelectSetAndLinkChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
1985{
1986 PGMMCHUNKFREESET pSet;
1987 if (pGMM->fBoundMemoryMode)
1988 pSet = &pGVM->gmm.s.Private;
1989 else if (pChunk->cShared)
1990 pSet = &pGMM->Shared;
1991 else
1992 pSet = &pGMM->PrivateX;
1993 gmmR0LinkChunk(pChunk, pSet);
1994}
1995
1996
1997/**
1998 * Frees a Chunk ID.
1999 *
2000 * @param pGMM Pointer to the GMM instance.
2001 * @param idChunk The Chunk ID to free.
2002 */
2003static void gmmR0FreeChunkId(PGMM pGMM, uint32_t idChunk)
2004{
2005 AssertReturnVoid(idChunk != NIL_GMM_CHUNKID);
2006 RTSpinlockAcquire(pGMM->hSpinLockChunkId); /* We could probably skip the locking here, I think. */
2007
2008 AssertMsg(ASMBitTest(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk));
2009 ASMAtomicBitClear(&pGMM->bmChunkId[0], idChunk);
2010
2011 RTSpinlockRelease(pGMM->hSpinLockChunkId);
2012}
2013
2014
2015/**
2016 * Allocates a new Chunk ID.
2017 *
2018 * @returns The Chunk ID.
2019 * @param pGMM Pointer to the GMM instance.
2020 */
2021static uint32_t gmmR0AllocateChunkId(PGMM pGMM)
2022{
2023 AssertCompile(!((GMM_CHUNKID_LAST + 1) & 31)); /* must be a multiple of 32 */
2024 AssertCompile(NIL_GMM_CHUNKID == 0);
2025
2026 RTSpinlockAcquire(pGMM->hSpinLockChunkId);
2027
2028 /*
2029 * Try the next sequential one.
2030 */
2031 int32_t idChunk = ++pGMM->idChunkPrev;
2032 if ( (uint32_t)idChunk <= GMM_CHUNKID_LAST
2033 && idChunk > NIL_GMM_CHUNKID)
2034 {
2035 if (!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk))
2036 {
2037 RTSpinlockRelease(pGMM->hSpinLockChunkId);
2038 return idChunk;
2039 }
2040
2041 /*
2042 * Scan sequentially from the last one.
2043 */
2044 if ((uint32_t)idChunk < GMM_CHUNKID_LAST)
2045 {
2046 idChunk = ASMBitNextClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1, idChunk);
2047 if ( idChunk > NIL_GMM_CHUNKID
2048 && (uint32_t)idChunk <= GMM_CHUNKID_LAST)
2049 {
2050 AssertMsgReturnStmt(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk),
2051 RTSpinlockRelease(pGMM->hSpinLockChunkId), NIL_GMM_CHUNKID);
2052
2053 pGMM->idChunkPrev = idChunk;
2054 RTSpinlockRelease(pGMM->hSpinLockChunkId);
2055 return idChunk;
2056 }
2057 }
2058 }
2059
2060 /*
2061 * Ok, scan from the start.
2062 * We're not racing anyone, so there is no need to expect failures or have restart loops.
2063 */
2064 idChunk = ASMBitFirstClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1);
2065 AssertMsgReturnStmt(idChunk > NIL_GMM_CHUNKID && (uint32_t)idChunk <= GMM_CHUNKID_LAST, ("%#x\n", idChunk),
2066 RTSpinlockRelease(pGMM->hSpinLockChunkId), NIL_GVM_HANDLE);
2067 AssertMsgReturnStmt(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk),
2068 RTSpinlockRelease(pGMM->hSpinLockChunkId), NIL_GMM_CHUNKID);
2069
2070 pGMM->idChunkPrev = idChunk;
2071 RTSpinlockRelease(pGMM->hSpinLockChunkId);
2072 return idChunk;
2073}
2074
2075
2076/**
2077 * Allocates one private page.
2078 *
2079 * Worker for gmmR0AllocatePages.
2080 *
2081 * @param pChunk The chunk to allocate it from.
2082 * @param hGVM The GVM handle of the VM requesting memory.
2083 * @param pPageDesc The page descriptor.
2084 */
2085static void gmmR0AllocatePage(PGMMCHUNK pChunk, uint32_t hGVM, PGMMPAGEDESC pPageDesc)
2086{
2087 /* update the chunk stats. */
2088 if (pChunk->hGVM == NIL_GVM_HANDLE)
2089 pChunk->hGVM = hGVM;
2090 Assert(pChunk->cFree);
2091 pChunk->cFree--;
2092 pChunk->cPrivate++;
2093
2094 /* unlink the first free page. */
2095 const uint32_t iPage = pChunk->iFreeHead;
2096 AssertReleaseMsg(iPage < RT_ELEMENTS(pChunk->aPages), ("%d\n", iPage));
2097 PGMMPAGE pPage = &pChunk->aPages[iPage];
2098 Assert(GMM_PAGE_IS_FREE(pPage));
2099 pChunk->iFreeHead = pPage->Free.iNext;
2100 Log3(("A pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x iNext=%#x\n",
2101 pPage, iPage, (pChunk->Core.Key << GMM_CHUNKID_SHIFT) | iPage,
2102 pPage->Common.u2State, pChunk->iFreeHead, pPage->Free.iNext));
2103
2104 bool const fZeroed = pPage->Free.fZeroed;
2105
2106 /* make the page private. */
2107 pPage->u = 0;
2108 AssertCompile(GMM_PAGE_STATE_PRIVATE == 0);
2109 pPage->Private.hGVM = hGVM;
2110 AssertCompile(NIL_RTHCPHYS >= GMM_GCPHYS_LAST);
2111 AssertCompile(GMM_GCPHYS_UNSHAREABLE >= GMM_GCPHYS_LAST);
2112 if (pPageDesc->HCPhysGCPhys <= GMM_GCPHYS_LAST)
2113 pPage->Private.pfn = pPageDesc->HCPhysGCPhys >> GUEST_PAGE_SHIFT;
2114 else
2115 pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE; /* unshareable / unassigned - same thing. */
2116
2117 /* update the page descriptor. */
2118 pPageDesc->idSharedPage = NIL_GMM_PAGEID;
2119 pPageDesc->idPage = (pChunk->Core.Key << GMM_CHUNKID_SHIFT) | iPage;
2120 RTHCPHYS const HCPhys = RTR0MemObjGetPagePhysAddr(pChunk->hMemObj, iPage);
2121 Assert(HCPhys != NIL_RTHCPHYS); Assert(HCPhys < NIL_GMMPAGEDESC_PHYS);
2122 pPageDesc->HCPhysGCPhys = HCPhys;
2123 pPageDesc->fZeroed = fZeroed;
2124}
2125
2126
2127/**
2128 * Picks the free pages from a chunk.
2129 *
2130 * @returns The new page descriptor table index.
2131 * @param pChunk The chunk.
2132 * @param hGVM The affinity of the chunk. NIL_GVM_HANDLE for no
2133 * affinity.
2134 * @param iPage The current page descriptor table index.
2135 * @param cPages The total number of pages to allocate.
2136 * @param paPages The page descriptor table (input + ouput).
2137 */
2138static uint32_t gmmR0AllocatePagesFromChunk(PGMMCHUNK pChunk, uint16_t const hGVM, uint32_t iPage, uint32_t cPages,
2139 PGMMPAGEDESC paPages)
2140{
2141 PGMMCHUNKFREESET pSet = pChunk->pSet; Assert(pSet);
2142 gmmR0UnlinkChunk(pChunk);
2143
2144 for (; pChunk->cFree && iPage < cPages; iPage++)
2145 gmmR0AllocatePage(pChunk, hGVM, &paPages[iPage]);
2146
2147 gmmR0LinkChunk(pChunk, pSet);
2148 return iPage;
2149}
2150
2151
2152/**
2153 * Registers a new chunk of memory.
2154 *
2155 * This is called by gmmR0AllocateOneChunk and GMMR0AllocateLargePage.
2156 *
2157 * In the GMMR0AllocateLargePage case the GMM_CHUNK_FLAGS_LARGE_PAGE flag is
2158 * set and the chunk will be registered as fully allocated to save time.
2159 *
2160 * @returns VBox status code. On success, the giant GMM lock will be held, the
2161 * caller must release it (ugly).
2162 * @param pGMM Pointer to the GMM instance.
2163 * @param pSet Pointer to the set.
2164 * @param hMemObj The memory object for the chunk.
2165 * @param hGVM The affinity of the chunk. NIL_GVM_HANDLE for no
2166 * affinity.
2167 * @param pSession Same as @a hGVM.
2168 * @param fChunkFlags The chunk flags, GMM_CHUNK_FLAGS_XXX.
2169 * @param cPages The number of pages requested. Zero for large pages.
2170 * @param paPages The page descriptor table (input + output). NULL for
2171 * large pages.
2172 * @param piPage The pointer to the page descriptor table index variable.
2173 * This will be updated. NULL for large pages.
2174 * @param ppChunk Chunk address (out).
2175 *
2176 * @remarks The caller must not own the giant GMM mutex.
2177 * The giant GMM mutex will be acquired and returned acquired in
2178 * the success path. On failure, no locks will be held.
2179 */
2180static int gmmR0RegisterChunk(PGMM pGMM, PGMMCHUNKFREESET pSet, RTR0MEMOBJ hMemObj, uint16_t hGVM, PSUPDRVSESSION pSession,
2181 uint16_t fChunkFlags, uint32_t cPages, PGMMPAGEDESC paPages, uint32_t *piPage, PGMMCHUNK *ppChunk)
2182{
2183 /*
2184 * Validate input & state.
2185 */
2186 Assert(pGMM->hMtxOwner != RTThreadNativeSelf());
2187 Assert(hGVM != NIL_GVM_HANDLE || pGMM->fBoundMemoryMode);
2188 Assert(fChunkFlags == 0 || fChunkFlags == GMM_CHUNK_FLAGS_LARGE_PAGE);
2189 if (!(fChunkFlags &= GMM_CHUNK_FLAGS_LARGE_PAGE))
2190 {
2191 AssertPtr(paPages);
2192 AssertPtr(piPage);
2193 Assert(cPages > 0);
2194 Assert(cPages > *piPage);
2195 }
2196 else
2197 {
2198 Assert(cPages == 0);
2199 Assert(!paPages);
2200 Assert(!piPage);
2201 }
2202
2203#ifndef VBOX_WITH_LINEAR_HOST_PHYS_MEM
2204 /*
2205 * Get a ring-0 mapping of the object.
2206 */
2207 uint8_t *pbMapping = (uint8_t *)RTR0MemObjAddress(hMemObj);
2208 if (!pbMapping)
2209 {
2210 RTR0MEMOBJ hMapObj;
2211 int rc = RTR0MemObjMapKernel(&hMapObj, hMemObj, (void *)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE);
2212 if (RT_SUCCESS(rc))
2213 pbMapping = (uint8_t *)RTR0MemObjAddress(hMapObj);
2214 else
2215 return rc;
2216 AssertPtr(pbMapping);
2217 }
2218#endif
2219
2220 /*
2221 * Allocate a chunk and an ID for it.
2222 */
2223 int rc;
2224 PGMMCHUNK pChunk = (PGMMCHUNK)RTMemAllocZ(sizeof(*pChunk));
2225 if (pChunk)
2226 {
2227 pChunk->Core.Key = gmmR0AllocateChunkId(pGMM);
2228 if ( pChunk->Core.Key != NIL_GMM_CHUNKID
2229 && pChunk->Core.Key <= GMM_CHUNKID_LAST)
2230 {
2231 /*
2232 * Initialize it.
2233 */
2234 pChunk->hMemObj = hMemObj;
2235#ifndef VBOX_WITH_LINEAR_HOST_PHYS_MEM
2236 pChunk->pbMapping = pbMapping;
2237#endif
2238 pChunk->hGVM = hGVM;
2239 pChunk->idNumaNode = gmmR0GetCurrentNumaNodeId();
2240 pChunk->iChunkMtx = UINT8_MAX;
2241 pChunk->fFlags = fChunkFlags;
2242 pChunk->uidOwner = pSession ? SUPR0GetSessionUid(pSession) : NIL_RTUID;
2243 /*pChunk->cShared = 0; */
2244
2245 uint32_t const iDstPageFirst = piPage ? *piPage : cPages;
2246 if (!(fChunkFlags & GMM_CHUNK_FLAGS_LARGE_PAGE))
2247 {
2248 /*
2249 * Allocate the requested number of pages from the start of the chunk,
2250 * queue the rest (if any) on the free list.
2251 */
2252 uint32_t const cPagesAlloc = RT_MIN(cPages - iDstPageFirst, GMM_CHUNK_NUM_PAGES);
2253 pChunk->cPrivate = cPagesAlloc;
2254 pChunk->cFree = GMM_CHUNK_NUM_PAGES - cPagesAlloc;
2255 pChunk->iFreeHead = GMM_CHUNK_NUM_PAGES > cPagesAlloc ? cPagesAlloc : UINT16_MAX;
2256
2257 /* Alloc pages: */
2258 uint32_t const idPageChunk = pChunk->Core.Key << GMM_CHUNKID_SHIFT;
2259 uint32_t iDstPage = iDstPageFirst;
2260 uint32_t iPage;
2261 for (iPage = 0; iPage < cPagesAlloc; iPage++, iDstPage++)
2262 {
2263 if (paPages[iDstPage].HCPhysGCPhys <= GMM_GCPHYS_LAST)
2264 pChunk->aPages[iPage].Private.pfn = paPages[iDstPage].HCPhysGCPhys >> GUEST_PAGE_SHIFT;
2265 else
2266 pChunk->aPages[iPage].Private.pfn = GMM_PAGE_PFN_UNSHAREABLE; /* unshareable / unassigned - same thing. */
2267 pChunk->aPages[iPage].Private.hGVM = hGVM;
2268 pChunk->aPages[iPage].Private.u2State = GMM_PAGE_STATE_PRIVATE;
2269
2270 paPages[iDstPage].HCPhysGCPhys = RTR0MemObjGetPagePhysAddr(hMemObj, iPage);
2271 paPages[iDstPage].fZeroed = true;
2272 paPages[iDstPage].idPage = idPageChunk | iPage;
2273 paPages[iDstPage].idSharedPage = NIL_GMM_PAGEID;
2274 }
2275 *piPage = iDstPage;
2276
2277 /* Build free list: */
2278 if (iPage < RT_ELEMENTS(pChunk->aPages))
2279 {
2280 Assert(pChunk->iFreeHead == iPage);
2281 for (; iPage < RT_ELEMENTS(pChunk->aPages) - 1; iPage++)
2282 {
2283 pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
2284 pChunk->aPages[iPage].Free.fZeroed = true;
2285 pChunk->aPages[iPage].Free.iNext = iPage + 1;
2286 }
2287 pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.u2State = GMM_PAGE_STATE_FREE;
2288 pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.fZeroed = true;
2289 pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.iNext = UINT16_MAX;
2290 }
2291 else
2292 Assert(pChunk->iFreeHead == UINT16_MAX);
2293 }
2294 else
2295 {
2296 /*
2297 * Large page: Mark all pages as privately allocated (watered down gmmR0AllocatePage).
2298 */
2299 pChunk->cFree = 0;
2300 pChunk->cPrivate = GMM_CHUNK_NUM_PAGES;
2301 pChunk->iFreeHead = UINT16_MAX;
2302
2303 for (unsigned iPage = 0; iPage < RT_ELEMENTS(pChunk->aPages); iPage++)
2304 {
2305 pChunk->aPages[iPage].Private.pfn = GMM_PAGE_PFN_UNSHAREABLE;
2306 pChunk->aPages[iPage].Private.hGVM = hGVM;
2307 pChunk->aPages[iPage].Private.u2State = GMM_PAGE_STATE_PRIVATE;
2308 }
2309 }
2310
2311 /*
2312 * Zero the memory if it wasn't zeroed by the host already.
2313 * This simplifies keeping secret kernel bits from userland and brings
2314 * everyone to the same level wrt allocation zeroing.
2315 */
2316 rc = VINF_SUCCESS;
2317 if (!RTR0MemObjWasZeroInitialized(hMemObj))
2318 {
2319#ifdef VBOX_WITH_LINEAR_HOST_PHYS_MEM
2320 if (!(fChunkFlags & GMM_CHUNK_FLAGS_LARGE_PAGE))
2321 {
2322 for (uint32_t iPage = 0; iPage < GMM_CHUNK_SIZE / HOST_PAGE_SIZE; iPage++)
2323 {
2324 void *pvPage = NULL;
2325 rc = SUPR0HCPhysToVirt(RTR0MemObjGetPagePhysAddr(hMemObj, iPage), &pvPage);
2326 AssertRCBreak(rc);
2327 RT_BZERO(pvPage, HOST_PAGE_SIZE);
2328 }
2329 }
2330 else
2331 {
2332 /* Can do the whole large page in one go. */
2333 void *pvPage = NULL;
2334 rc = SUPR0HCPhysToVirt(RTR0MemObjGetPagePhysAddr(hMemObj, 0), &pvPage);
2335 AssertRC(rc);
2336 if (RT_SUCCESS(rc))
2337 RT_BZERO(pvPage, GMM_CHUNK_SIZE);
2338 }
2339#else
2340 RT_BZERO(pbMapping, GMM_CHUNK_SIZE);
2341#endif
2342 }
2343 if (RT_SUCCESS(rc))
2344 {
2345 *ppChunk = pChunk;
2346
2347 /*
2348 * Allocate a Chunk ID and insert it into the tree.
2349 * This has to be done behind the mutex of course.
2350 */
2351 rc = gmmR0MutexAcquire(pGMM);
2352 if (RT_SUCCESS(rc))
2353 {
2354 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2355 {
2356 RTSpinlockAcquire(pGMM->hSpinLockTree);
2357 if (RTAvlU32Insert(&pGMM->pChunks, &pChunk->Core))
2358 {
2359 pGMM->cChunks++;
2360 RTListAppend(&pGMM->ChunkList, &pChunk->ListNode);
2361 RTSpinlockRelease(pGMM->hSpinLockTree);
2362
2363 gmmR0LinkChunk(pChunk, pSet);
2364
2365 LogFlow(("gmmR0RegisterChunk: pChunk=%p id=%#x cChunks=%d\n", pChunk, pChunk->Core.Key, pGMM->cChunks));
2366 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2367 return VINF_SUCCESS;
2368 }
2369
2370 /*
2371 * Bail out.
2372 */
2373 RTSpinlockRelease(pGMM->hSpinLockTree);
2374 rc = VERR_GMM_CHUNK_INSERT;
2375 }
2376 else
2377 rc = VERR_GMM_IS_NOT_SANE;
2378 gmmR0MutexRelease(pGMM);
2379 }
2380 *ppChunk = NULL;
2381 }
2382
2383 /* Undo any page allocations. */
2384 if (!(fChunkFlags & GMM_CHUNK_FLAGS_LARGE_PAGE))
2385 {
2386 uint32_t const cToFree = pChunk->cPrivate;
2387 Assert(*piPage - iDstPageFirst == cToFree);
2388 for (uint32_t iDstPage = iDstPageFirst, iPage = 0; iPage < cToFree; iPage++, iDstPage++)
2389 {
2390 paPages[iDstPageFirst].fZeroed = false;
2391 if (pChunk->aPages[iPage].Private.pfn == GMM_PAGE_PFN_UNSHAREABLE)
2392 paPages[iDstPageFirst].HCPhysGCPhys = NIL_GMMPAGEDESC_PHYS;
2393 else
2394 paPages[iDstPageFirst].HCPhysGCPhys = (RTHCPHYS)pChunk->aPages[iPage].Private.pfn << GUEST_PAGE_SHIFT;
2395 paPages[iDstPageFirst].idPage = NIL_GMM_PAGEID;
2396 paPages[iDstPageFirst].idSharedPage = NIL_GMM_PAGEID;
2397 }
2398 *piPage = iDstPageFirst;
2399 }
2400
2401 gmmR0FreeChunkId(pGMM, pChunk->Core.Key);
2402 }
2403 else
2404 rc = VERR_GMM_CHUNK_INSERT;
2405 RTMemFree(pChunk);
2406 }
2407 else
2408 rc = VERR_NO_MEMORY;
2409 return rc;
2410}
2411
2412
2413/**
2414 * Allocate a new chunk, immediately pick the requested pages from it, and adds
2415 * what's remaining to the specified free set.
2416 *
2417 * @note This will leave the giant mutex while allocating the new chunk!
2418 *
2419 * @returns VBox status code.
2420 * @param pGMM Pointer to the GMM instance data.
2421 * @param pGVM Pointer to the kernel-only VM instace data.
2422 * @param pSet Pointer to the free set.
2423 * @param cPages The number of pages requested.
2424 * @param paPages The page descriptor table (input + output).
2425 * @param piPage The pointer to the page descriptor table index variable.
2426 * This will be updated.
2427 */
2428static int gmmR0AllocateChunkNew(PGMM pGMM, PGVM pGVM, PGMMCHUNKFREESET pSet, uint32_t cPages,
2429 PGMMPAGEDESC paPages, uint32_t *piPage)
2430{
2431 gmmR0MutexRelease(pGMM);
2432
2433 RTR0MEMOBJ hMemObj;
2434 int rc;
2435#ifdef VBOX_WITH_LINEAR_HOST_PHYS_MEM
2436 if (pGMM->fHasWorkingAllocPhysNC)
2437 rc = RTR0MemObjAllocPhysNC(&hMemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS);
2438 else
2439#endif
2440 rc = RTR0MemObjAllocPage(&hMemObj, GMM_CHUNK_SIZE, false /*fExecutable*/);
2441 if (RT_SUCCESS(rc))
2442 {
2443 PGMMCHUNK pIgnored;
2444 rc = gmmR0RegisterChunk(pGMM, pSet, hMemObj, pGVM->hSelf, pGVM->pSession, 0 /*fChunkFlags*/,
2445 cPages, paPages, piPage, &pIgnored);
2446 if (RT_SUCCESS(rc))
2447 return VINF_SUCCESS;
2448
2449 /* bail out */
2450 RTR0MemObjFree(hMemObj, true /* fFreeMappings */);
2451 }
2452
2453 int rc2 = gmmR0MutexAcquire(pGMM);
2454 AssertRCReturn(rc2, RT_FAILURE(rc) ? rc : rc2);
2455 return rc;
2456
2457}
2458
2459
2460/**
2461 * As a last restort we'll pick any page we can get.
2462 *
2463 * @returns The new page descriptor table index.
2464 * @param pSet The set to pick from.
2465 * @param pGVM Pointer to the global VM structure.
2466 * @param uidSelf The UID of the caller.
2467 * @param iPage The current page descriptor table index.
2468 * @param cPages The total number of pages to allocate.
2469 * @param paPages The page descriptor table (input + ouput).
2470 */
2471static uint32_t gmmR0AllocatePagesIndiscriminately(PGMMCHUNKFREESET pSet, PGVM pGVM, RTUID uidSelf,
2472 uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
2473{
2474 unsigned iList = RT_ELEMENTS(pSet->apLists);
2475 while (iList-- > 0)
2476 {
2477 PGMMCHUNK pChunk = pSet->apLists[iList];
2478 while (pChunk)
2479 {
2480 PGMMCHUNK pNext = pChunk->pFreeNext;
2481 if ( pChunk->uidOwner == uidSelf
2482 || ( pChunk->cMappingsX == 0
2483 && pChunk->cFree == (GMM_CHUNK_SIZE >> GUEST_PAGE_SHIFT)))
2484 {
2485 iPage = gmmR0AllocatePagesFromChunk(pChunk, pGVM->hSelf, iPage, cPages, paPages);
2486 if (iPage >= cPages)
2487 return iPage;
2488 }
2489
2490 pChunk = pNext;
2491 }
2492 }
2493 return iPage;
2494}
2495
2496
2497/**
2498 * Pick pages from empty chunks on the same NUMA node.
2499 *
2500 * @returns The new page descriptor table index.
2501 * @param pSet The set to pick from.
2502 * @param pGVM Pointer to the global VM structure.
2503 * @param uidSelf The UID of the caller.
2504 * @param iPage The current page descriptor table index.
2505 * @param cPages The total number of pages to allocate.
2506 * @param paPages The page descriptor table (input + ouput).
2507 */
2508static uint32_t gmmR0AllocatePagesFromEmptyChunksOnSameNode(PGMMCHUNKFREESET pSet, PGVM pGVM, RTUID uidSelf,
2509 uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
2510{
2511 PGMMCHUNK pChunk = pSet->apLists[GMM_CHUNK_FREE_SET_UNUSED_LIST];
2512 if (pChunk)
2513 {
2514 uint16_t const idNumaNode = gmmR0GetCurrentNumaNodeId();
2515 while (pChunk)
2516 {
2517 PGMMCHUNK pNext = pChunk->pFreeNext;
2518
2519 if ( pChunk->idNumaNode == idNumaNode
2520 && ( pChunk->uidOwner == uidSelf
2521 || pChunk->cMappingsX == 0))
2522 {
2523 pChunk->hGVM = pGVM->hSelf;
2524 pChunk->uidOwner = uidSelf;
2525 iPage = gmmR0AllocatePagesFromChunk(pChunk, pGVM->hSelf, iPage, cPages, paPages);
2526 if (iPage >= cPages)
2527 {
2528 pGVM->gmm.s.idLastChunkHint = pChunk->cFree ? pChunk->Core.Key : NIL_GMM_CHUNKID;
2529 return iPage;
2530 }
2531 }
2532
2533 pChunk = pNext;
2534 }
2535 }
2536 return iPage;
2537}
2538
2539
2540/**
2541 * Pick pages from non-empty chunks on the same NUMA node.
2542 *
2543 * @returns The new page descriptor table index.
2544 * @param pSet The set to pick from.
2545 * @param pGVM Pointer to the global VM structure.
2546 * @param uidSelf The UID of the caller.
2547 * @param iPage The current page descriptor table index.
2548 * @param cPages The total number of pages to allocate.
2549 * @param paPages The page descriptor table (input + ouput).
2550 */
2551static uint32_t gmmR0AllocatePagesFromSameNode(PGMMCHUNKFREESET pSet, PGVM pGVM, RTUID const uidSelf,
2552 uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
2553{
2554 /** @todo start by picking from chunks with about the right size first? */
2555 uint16_t const idNumaNode = gmmR0GetCurrentNumaNodeId();
2556 unsigned iList = GMM_CHUNK_FREE_SET_UNUSED_LIST;
2557 while (iList-- > 0)
2558 {
2559 PGMMCHUNK pChunk = pSet->apLists[iList];
2560 while (pChunk)
2561 {
2562 PGMMCHUNK pNext = pChunk->pFreeNext;
2563
2564 if ( pChunk->idNumaNode == idNumaNode
2565 && pChunk->uidOwner == uidSelf)
2566 {
2567 iPage = gmmR0AllocatePagesFromChunk(pChunk, pGVM->hSelf, iPage, cPages, paPages);
2568 if (iPage >= cPages)
2569 {
2570 pGVM->gmm.s.idLastChunkHint = pChunk->cFree ? pChunk->Core.Key : NIL_GMM_CHUNKID;
2571 return iPage;
2572 }
2573 }
2574
2575 pChunk = pNext;
2576 }
2577 }
2578 return iPage;
2579}
2580
2581
2582/**
2583 * Pick pages that are in chunks already associated with the VM.
2584 *
2585 * @returns The new page descriptor table index.
2586 * @param pGMM Pointer to the GMM instance data.
2587 * @param pGVM Pointer to the global VM structure.
2588 * @param pSet The set to pick from.
2589 * @param iPage The current page descriptor table index.
2590 * @param cPages The total number of pages to allocate.
2591 * @param paPages The page descriptor table (input + ouput).
2592 */
2593static uint32_t gmmR0AllocatePagesAssociatedWithVM(PGMM pGMM, PGVM pGVM, PGMMCHUNKFREESET pSet,
2594 uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
2595{
2596 uint16_t const hGVM = pGVM->hSelf;
2597
2598 /* Hint. */
2599 if (pGVM->gmm.s.idLastChunkHint != NIL_GMM_CHUNKID)
2600 {
2601 PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, pGVM->gmm.s.idLastChunkHint);
2602 if (pChunk && pChunk->cFree)
2603 {
2604 iPage = gmmR0AllocatePagesFromChunk(pChunk, hGVM, iPage, cPages, paPages);
2605 if (iPage >= cPages)
2606 return iPage;
2607 }
2608 }
2609
2610 /* Scan. */
2611 for (unsigned iList = 0; iList < RT_ELEMENTS(pSet->apLists); iList++)
2612 {
2613 PGMMCHUNK pChunk = pSet->apLists[iList];
2614 while (pChunk)
2615 {
2616 PGMMCHUNK pNext = pChunk->pFreeNext;
2617
2618 if (pChunk->hGVM == hGVM)
2619 {
2620 iPage = gmmR0AllocatePagesFromChunk(pChunk, hGVM, iPage, cPages, paPages);
2621 if (iPage >= cPages)
2622 {
2623 pGVM->gmm.s.idLastChunkHint = pChunk->cFree ? pChunk->Core.Key : NIL_GMM_CHUNKID;
2624 return iPage;
2625 }
2626 }
2627
2628 pChunk = pNext;
2629 }
2630 }
2631 return iPage;
2632}
2633
2634
2635
2636/**
2637 * Pick pages in bound memory mode.
2638 *
2639 * @returns The new page descriptor table index.
2640 * @param pGVM Pointer to the global VM structure.
2641 * @param iPage The current page descriptor table index.
2642 * @param cPages The total number of pages to allocate.
2643 * @param paPages The page descriptor table (input + ouput).
2644 */
2645static uint32_t gmmR0AllocatePagesInBoundMode(PGVM pGVM, uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
2646{
2647 for (unsigned iList = 0; iList < RT_ELEMENTS(pGVM->gmm.s.Private.apLists); iList++)
2648 {
2649 PGMMCHUNK pChunk = pGVM->gmm.s.Private.apLists[iList];
2650 while (pChunk)
2651 {
2652 Assert(pChunk->hGVM == pGVM->hSelf);
2653 PGMMCHUNK pNext = pChunk->pFreeNext;
2654 iPage = gmmR0AllocatePagesFromChunk(pChunk, pGVM->hSelf, iPage, cPages, paPages);
2655 if (iPage >= cPages)
2656 return iPage;
2657 pChunk = pNext;
2658 }
2659 }
2660 return iPage;
2661}
2662
2663
2664/**
2665 * Checks if we should start picking pages from chunks of other VMs because
2666 * we're getting close to the system memory or reserved limit.
2667 *
2668 * @returns @c true if we should, @c false if we should first try allocate more
2669 * chunks.
2670 */
2671static bool gmmR0ShouldAllocatePagesInOtherChunksBecauseOfLimits(PGVM pGVM)
2672{
2673 /*
2674 * Don't allocate a new chunk if we're
2675 */
2676 uint64_t cPgReserved = pGVM->gmm.s.Stats.Reserved.cBasePages
2677 + pGVM->gmm.s.Stats.Reserved.cFixedPages
2678 - pGVM->gmm.s.Stats.cBalloonedPages
2679 /** @todo what about shared pages? */;
2680 uint64_t cPgAllocated = pGVM->gmm.s.Stats.Allocated.cBasePages
2681 + pGVM->gmm.s.Stats.Allocated.cFixedPages;
2682 uint64_t cPgDelta = cPgReserved - cPgAllocated;
2683 if (cPgDelta < GMM_CHUNK_NUM_PAGES * 4)
2684 return true;
2685 /** @todo make the threshold configurable, also test the code to see if
2686 * this ever kicks in (we might be reserving too much or smth). */
2687
2688 /*
2689 * Check how close we're to the max memory limit and how many fragments
2690 * there are?...
2691 */
2692 /** @todo */
2693
2694 return false;
2695}
2696
2697
2698/**
2699 * Checks if we should start picking pages from chunks of other VMs because
2700 * there is a lot of free pages around.
2701 *
2702 * @returns @c true if we should, @c false if we should first try allocate more
2703 * chunks.
2704 */
2705static bool gmmR0ShouldAllocatePagesInOtherChunksBecauseOfLotsFree(PGMM pGMM)
2706{
2707 /*
2708 * Setting the limit at 16 chunks (32 MB) at the moment.
2709 */
2710 if (pGMM->PrivateX.cFreePages >= GMM_CHUNK_NUM_PAGES * 16)
2711 return true;
2712 return false;
2713}
2714
2715
2716/**
2717 * Common worker for GMMR0AllocateHandyPages and GMMR0AllocatePages.
2718 *
2719 * @returns VBox status code:
2720 * @retval VINF_SUCCESS on success.
2721 * @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2722 * @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2723 * that is we're trying to allocate more than we've reserved.
2724 *
2725 * @param pGMM Pointer to the GMM instance data.
2726 * @param pGVM Pointer to the VM.
2727 * @param cPages The number of pages to allocate.
2728 * @param paPages Pointer to the page descriptors. See GMMPAGEDESC for
2729 * details on what is expected on input.
2730 * @param enmAccount The account to charge.
2731 *
2732 * @remarks Caller owns the giant GMM lock.
2733 */
2734static int gmmR0AllocatePagesNew(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount)
2735{
2736 Assert(pGMM->hMtxOwner == RTThreadNativeSelf());
2737
2738 /*
2739 * Check allocation limits.
2740 */
2741 if (RT_LIKELY(pGMM->cAllocatedPages + cPages <= pGMM->cMaxPages))
2742 { /* likely */ }
2743 else
2744 return VERR_GMM_HIT_GLOBAL_LIMIT;
2745
2746 switch (enmAccount)
2747 {
2748 case GMMACCOUNT_BASE:
2749 if (RT_LIKELY( pGVM->gmm.s.Stats.Allocated.cBasePages + pGVM->gmm.s.Stats.cBalloonedPages + cPages
2750 <= pGVM->gmm.s.Stats.Reserved.cBasePages))
2751 { /* likely */ }
2752 else
2753 {
2754 Log(("gmmR0AllocatePages:Base: Reserved=%#llx Allocated+Ballooned+Requested=%#llx+%#llx+%#x!\n",
2755 pGVM->gmm.s.Stats.Reserved.cBasePages, pGVM->gmm.s.Stats.Allocated.cBasePages,
2756 pGVM->gmm.s.Stats.cBalloonedPages, cPages));
2757 return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2758 }
2759 break;
2760 case GMMACCOUNT_SHADOW:
2761 if (RT_LIKELY(pGVM->gmm.s.Stats.Allocated.cShadowPages + cPages <= pGVM->gmm.s.Stats.Reserved.cShadowPages))
2762 { /* likely */ }
2763 else
2764 {
2765 Log(("gmmR0AllocatePages:Shadow: Reserved=%#x Allocated+Requested=%#x+%#x!\n",
2766 pGVM->gmm.s.Stats.Reserved.cShadowPages, pGVM->gmm.s.Stats.Allocated.cShadowPages, cPages));
2767 return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2768 }
2769 break;
2770 case GMMACCOUNT_FIXED:
2771 if (RT_LIKELY(pGVM->gmm.s.Stats.Allocated.cFixedPages + cPages <= pGVM->gmm.s.Stats.Reserved.cFixedPages))
2772 { /* likely */ }
2773 else
2774 {
2775 Log(("gmmR0AllocatePages:Fixed: Reserved=%#x Allocated+Requested=%#x+%#x!\n",
2776 pGVM->gmm.s.Stats.Reserved.cFixedPages, pGVM->gmm.s.Stats.Allocated.cFixedPages, cPages));
2777 return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2778 }
2779 break;
2780 default:
2781 AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_IPE_NOT_REACHED_DEFAULT_CASE);
2782 }
2783
2784 /*
2785 * Update the accounts before we proceed because we might be leaving the
2786 * protection of the global mutex and thus run the risk of permitting
2787 * too much memory to be allocated.
2788 */
2789 switch (enmAccount)
2790 {
2791 case GMMACCOUNT_BASE: pGVM->gmm.s.Stats.Allocated.cBasePages += cPages; break;
2792 case GMMACCOUNT_SHADOW: pGVM->gmm.s.Stats.Allocated.cShadowPages += cPages; break;
2793 case GMMACCOUNT_FIXED: pGVM->gmm.s.Stats.Allocated.cFixedPages += cPages; break;
2794 default: AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_IPE_NOT_REACHED_DEFAULT_CASE);
2795 }
2796 pGVM->gmm.s.Stats.cPrivatePages += cPages;
2797 pGMM->cAllocatedPages += cPages;
2798
2799 /*
2800 * Bound mode is also relatively straightforward.
2801 */
2802 uint32_t iPage = 0;
2803 int rc = VINF_SUCCESS;
2804 if (pGMM->fBoundMemoryMode)
2805 {
2806 iPage = gmmR0AllocatePagesInBoundMode(pGVM, iPage, cPages, paPages);
2807 if (iPage < cPages)
2808 do
2809 rc = gmmR0AllocateChunkNew(pGMM, pGVM, &pGVM->gmm.s.Private, cPages, paPages, &iPage);
2810 while (iPage < cPages && RT_SUCCESS(rc));
2811 }
2812 /*
2813 * Shared mode is trickier as we should try archive the same locality as
2814 * in bound mode, but smartly make use of non-full chunks allocated by
2815 * other VMs if we're low on memory.
2816 */
2817 else
2818 {
2819 RTUID const uidSelf = SUPR0GetSessionUid(pGVM->pSession);
2820
2821 /* Pick the most optimal pages first. */
2822 iPage = gmmR0AllocatePagesAssociatedWithVM(pGMM, pGVM, &pGMM->PrivateX, iPage, cPages, paPages);
2823 if (iPage < cPages)
2824 {
2825 /* Maybe we should try getting pages from chunks "belonging" to
2826 other VMs before allocating more chunks? */
2827 bool fTriedOnSameAlready = false;
2828 if (gmmR0ShouldAllocatePagesInOtherChunksBecauseOfLimits(pGVM))
2829 {
2830 iPage = gmmR0AllocatePagesFromSameNode(&pGMM->PrivateX, pGVM, uidSelf, iPage, cPages, paPages);
2831 fTriedOnSameAlready = true;
2832 }
2833
2834 /* Allocate memory from empty chunks. */
2835 if (iPage < cPages)
2836 iPage = gmmR0AllocatePagesFromEmptyChunksOnSameNode(&pGMM->PrivateX, pGVM, uidSelf, iPage, cPages, paPages);
2837
2838 /* Grab empty shared chunks. */
2839 if (iPage < cPages)
2840 iPage = gmmR0AllocatePagesFromEmptyChunksOnSameNode(&pGMM->Shared, pGVM, uidSelf, iPage, cPages, paPages);
2841
2842 /* If there is a lof of free pages spread around, try not waste
2843 system memory on more chunks. (Should trigger defragmentation.) */
2844 if ( !fTriedOnSameAlready
2845 && gmmR0ShouldAllocatePagesInOtherChunksBecauseOfLotsFree(pGMM))
2846 {
2847 iPage = gmmR0AllocatePagesFromSameNode(&pGMM->PrivateX, pGVM, uidSelf, iPage, cPages, paPages);
2848 if (iPage < cPages)
2849 iPage = gmmR0AllocatePagesIndiscriminately(&pGMM->PrivateX, pGVM, uidSelf, iPage, cPages, paPages);
2850 }
2851
2852 /*
2853 * Ok, try allocate new chunks.
2854 */
2855 if (iPage < cPages)
2856 {
2857 do
2858 rc = gmmR0AllocateChunkNew(pGMM, pGVM, &pGMM->PrivateX, cPages, paPages, &iPage);
2859 while (iPage < cPages && RT_SUCCESS(rc));
2860
2861#if 0 /* We cannot mix chunks with different UIDs. */
2862 /* If the host is out of memory, take whatever we can get. */
2863 if ( (rc == VERR_NO_MEMORY || rc == VERR_NO_PHYS_MEMORY)
2864 && pGMM->PrivateX.cFreePages + pGMM->Shared.cFreePages >= cPages - iPage)
2865 {
2866 iPage = gmmR0AllocatePagesIndiscriminately(&pGMM->PrivateX, pGVM, iPage, cPages, paPages);
2867 if (iPage < cPages)
2868 iPage = gmmR0AllocatePagesIndiscriminately(&pGMM->Shared, pGVM, iPage, cPages, paPages);
2869 AssertRelease(iPage == cPages);
2870 rc = VINF_SUCCESS;
2871 }
2872#endif
2873 }
2874 }
2875 }
2876
2877 /*
2878 * Clean up on failure. Since this is bound to be a low-memory condition
2879 * we will give back any empty chunks that might be hanging around.
2880 */
2881 if (RT_SUCCESS(rc))
2882 { /* likely */ }
2883 else
2884 {
2885 /* Update the statistics. */
2886 pGVM->gmm.s.Stats.cPrivatePages -= cPages;
2887 pGMM->cAllocatedPages -= cPages - iPage;
2888 switch (enmAccount)
2889 {
2890 case GMMACCOUNT_BASE: pGVM->gmm.s.Stats.Allocated.cBasePages -= cPages; break;
2891 case GMMACCOUNT_SHADOW: pGVM->gmm.s.Stats.Allocated.cShadowPages -= cPages; break;
2892 case GMMACCOUNT_FIXED: pGVM->gmm.s.Stats.Allocated.cFixedPages -= cPages; break;
2893 default: AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_IPE_NOT_REACHED_DEFAULT_CASE);
2894 }
2895
2896 /* Release the pages. */
2897 while (iPage-- > 0)
2898 {
2899 uint32_t idPage = paPages[iPage].idPage;
2900 PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
2901 if (RT_LIKELY(pPage))
2902 {
2903 Assert(GMM_PAGE_IS_PRIVATE(pPage));
2904 Assert(pPage->Private.hGVM == pGVM->hSelf);
2905 gmmR0FreePrivatePage(pGMM, pGVM, idPage, pPage);
2906 }
2907 else
2908 AssertMsgFailed(("idPage=%#x\n", idPage));
2909
2910 paPages[iPage].idPage = NIL_GMM_PAGEID;
2911 paPages[iPage].idSharedPage = NIL_GMM_PAGEID;
2912 paPages[iPage].HCPhysGCPhys = NIL_GMMPAGEDESC_PHYS;
2913 paPages[iPage].fZeroed = false;
2914 }
2915
2916 /* Free empty chunks. */
2917 /** @todo */
2918
2919 /* return the fail status on failure */
2920 return rc;
2921 }
2922 return VINF_SUCCESS;
2923}
2924
2925
2926/**
2927 * Updates the previous allocations and allocates more pages.
2928 *
2929 * The handy pages are always taken from the 'base' memory account.
2930 * The allocated pages are not cleared and will contains random garbage.
2931 *
2932 * @returns VBox status code:
2933 * @retval VINF_SUCCESS on success.
2934 * @retval VERR_NOT_OWNER if the caller is not an EMT.
2935 * @retval VERR_GMM_PAGE_NOT_FOUND if one of the pages to update wasn't found.
2936 * @retval VERR_GMM_PAGE_NOT_PRIVATE if one of the pages to update wasn't a
2937 * private page.
2938 * @retval VERR_GMM_PAGE_NOT_SHARED if one of the pages to update wasn't a
2939 * shared page.
2940 * @retval VERR_GMM_NOT_PAGE_OWNER if one of the pages to be updated wasn't
2941 * owned by the VM.
2942 * @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2943 * @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2944 * that is we're trying to allocate more than we've reserved.
2945 *
2946 * @param pGVM The global (ring-0) VM structure.
2947 * @param idCpu The VCPU id.
2948 * @param cPagesToUpdate The number of pages to update (starting from the head).
2949 * @param cPagesToAlloc The number of pages to allocate (starting from the head).
2950 * @param paPages The array of page descriptors.
2951 * See GMMPAGEDESC for details on what is expected on input.
2952 * @thread EMT(idCpu)
2953 */
2954GMMR0DECL(int) GMMR0AllocateHandyPages(PGVM pGVM, VMCPUID idCpu, uint32_t cPagesToUpdate,
2955 uint32_t cPagesToAlloc, PGMMPAGEDESC paPages)
2956{
2957 LogFlow(("GMMR0AllocateHandyPages: pGVM=%p cPagesToUpdate=%#x cPagesToAlloc=%#x paPages=%p\n",
2958 pGVM, cPagesToUpdate, cPagesToAlloc, paPages));
2959
2960 /*
2961 * Validate & get basics.
2962 * (This is a relatively busy path, so make predictions where possible.)
2963 */
2964 PGMM pGMM;
2965 GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
2966 int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
2967 if (RT_FAILURE(rc))
2968 return rc;
2969
2970 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2971 AssertMsgReturn( (cPagesToUpdate && cPagesToUpdate < 1024)
2972 || (cPagesToAlloc && cPagesToAlloc < 1024),
2973 ("cPagesToUpdate=%#x cPagesToAlloc=%#x\n", cPagesToUpdate, cPagesToAlloc),
2974 VERR_INVALID_PARAMETER);
2975
2976 unsigned iPage = 0;
2977 for (; iPage < cPagesToUpdate; iPage++)
2978 {
2979 AssertMsgReturn( ( paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
2980 && !(paPages[iPage].HCPhysGCPhys & GUEST_PAGE_OFFSET_MASK))
2981 || paPages[iPage].HCPhysGCPhys == NIL_GMMPAGEDESC_PHYS
2982 || paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE,
2983 ("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys),
2984 VERR_INVALID_PARAMETER);
2985 /* ignore fZeroed here */
2986 AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
2987 /*|| paPages[iPage].idPage == NIL_GMM_PAGEID*/,
2988 ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2989 AssertMsgReturn( paPages[iPage].idSharedPage == NIL_GMM_PAGEID
2990 || paPages[iPage].idSharedPage <= GMM_PAGEID_LAST,
2991 ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2992 }
2993
2994 for (; iPage < cPagesToAlloc; iPage++)
2995 {
2996 AssertMsgReturn(paPages[iPage].HCPhysGCPhys == NIL_GMMPAGEDESC_PHYS, ("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys), VERR_INVALID_PARAMETER);
2997 AssertMsgReturn(paPages[iPage].fZeroed == false, ("#%#x: %#x\n", iPage, paPages[iPage].fZeroed), VERR_INVALID_PARAMETER);
2998 AssertMsgReturn(paPages[iPage].idPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2999 AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
3000 }
3001
3002 /*
3003 * Take the semaphore
3004 */
3005 VMMR0EMTBLOCKCTX Ctx;
3006 PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
3007 rc = VMMR0EmtPrepareToBlock(pGVCpu, VINF_SUCCESS, "GMMR0AllocateHandyPages", pGMM, &Ctx);
3008 AssertRCReturn(rc, rc);
3009
3010 rc = gmmR0MutexAcquire(pGMM);
3011 if ( RT_SUCCESS(rc)
3012 && GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3013 {
3014 /* No allocations before the initial reservation has been made! */
3015 if (RT_LIKELY( pGVM->gmm.s.Stats.Reserved.cBasePages
3016 && pGVM->gmm.s.Stats.Reserved.cFixedPages
3017 && pGVM->gmm.s.Stats.Reserved.cShadowPages))
3018 {
3019 /*
3020 * Perform the updates.
3021 * Stop on the first error.
3022 */
3023 for (iPage = 0; iPage < cPagesToUpdate; iPage++)
3024 {
3025 if (paPages[iPage].idPage != NIL_GMM_PAGEID)
3026 {
3027 PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idPage);
3028 if (RT_LIKELY(pPage))
3029 {
3030 if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
3031 {
3032 if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
3033 {
3034 AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
3035 if (RT_LIKELY(paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST))
3036 pPage->Private.pfn = paPages[iPage].HCPhysGCPhys >> GUEST_PAGE_SHIFT;
3037 else if (paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE)
3038 pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE;
3039 /* else: NIL_RTHCPHYS nothing */
3040
3041 paPages[iPage].idPage = NIL_GMM_PAGEID;
3042 paPages[iPage].HCPhysGCPhys = NIL_GMMPAGEDESC_PHYS;
3043 paPages[iPage].fZeroed = false;
3044 }
3045 else
3046 {
3047 Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not owner! hGVM=%#x hSelf=%#x\n",
3048 iPage, paPages[iPage].idPage, pPage->Private.hGVM, pGVM->hSelf));
3049 rc = VERR_GMM_NOT_PAGE_OWNER;
3050 break;
3051 }
3052 }
3053 else
3054 {
3055 Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not private! %.*Rhxs (type %d)\n", iPage, paPages[iPage].idPage, sizeof(*pPage), pPage, pPage->Common.u2State));
3056 rc = VERR_GMM_PAGE_NOT_PRIVATE;
3057 break;
3058 }
3059 }
3060 else
3061 {
3062 Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (private)\n", iPage, paPages[iPage].idPage));
3063 rc = VERR_GMM_PAGE_NOT_FOUND;
3064 break;
3065 }
3066 }
3067
3068 if (paPages[iPage].idSharedPage == NIL_GMM_PAGEID)
3069 { /* likely */ }
3070 else
3071 {
3072 PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idSharedPage);
3073 if (RT_LIKELY(pPage))
3074 {
3075 if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
3076 {
3077 AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
3078 Assert(pPage->Shared.cRefs);
3079 Assert(pGVM->gmm.s.Stats.cSharedPages);
3080 Assert(pGVM->gmm.s.Stats.Allocated.cBasePages);
3081
3082 Log(("GMMR0AllocateHandyPages: free shared page %x cRefs=%d\n", paPages[iPage].idSharedPage, pPage->Shared.cRefs));
3083 pGVM->gmm.s.Stats.cSharedPages--;
3084 pGVM->gmm.s.Stats.Allocated.cBasePages--;
3085 if (!--pPage->Shared.cRefs)
3086 gmmR0FreeSharedPage(pGMM, pGVM, paPages[iPage].idSharedPage, pPage);
3087 else
3088 {
3089 Assert(pGMM->cDuplicatePages);
3090 pGMM->cDuplicatePages--;
3091 }
3092
3093 paPages[iPage].idSharedPage = NIL_GMM_PAGEID;
3094 }
3095 else
3096 {
3097 Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not shared!\n", iPage, paPages[iPage].idSharedPage));
3098 rc = VERR_GMM_PAGE_NOT_SHARED;
3099 break;
3100 }
3101 }
3102 else
3103 {
3104 Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (shared)\n", iPage, paPages[iPage].idSharedPage));
3105 rc = VERR_GMM_PAGE_NOT_FOUND;
3106 break;
3107 }
3108 }
3109 } /* for each page to update */
3110
3111 if (RT_SUCCESS(rc) && cPagesToAlloc > 0)
3112 {
3113#ifdef VBOX_STRICT
3114 for (iPage = 0; iPage < cPagesToAlloc; iPage++)
3115 {
3116 Assert(paPages[iPage].HCPhysGCPhys == NIL_GMMPAGEDESC_PHYS);
3117 Assert(paPages[iPage].fZeroed == false);
3118 Assert(paPages[iPage].idPage == NIL_GMM_PAGEID);
3119 Assert(paPages[iPage].idSharedPage == NIL_GMM_PAGEID);
3120 }
3121#endif
3122
3123 /*
3124 * Join paths with GMMR0AllocatePages for the allocation.
3125 * Note! gmmR0AllocateMoreChunks may leave the protection of the mutex!
3126 */
3127 rc = gmmR0AllocatePagesNew(pGMM, pGVM, cPagesToAlloc, paPages, GMMACCOUNT_BASE);
3128 }
3129 }
3130 else
3131 rc = VERR_WRONG_ORDER;
3132 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3133 gmmR0MutexRelease(pGMM);
3134 }
3135 else if (RT_SUCCESS(rc))
3136 {
3137 gmmR0MutexRelease(pGMM);
3138 rc = VERR_GMM_IS_NOT_SANE;
3139 }
3140 VMMR0EmtResumeAfterBlocking(pGVCpu, &Ctx);
3141
3142 LogFlow(("GMMR0AllocateHandyPages: returns %Rrc\n", rc));
3143 return rc;
3144}
3145
3146
3147/**
3148 * Allocate one or more pages.
3149 *
3150 * This is typically used for ROMs and MMIO2 (VRAM) during VM creation.
3151 * The allocated pages are not cleared and will contain random garbage.
3152 *
3153 * @returns VBox status code:
3154 * @retval VINF_SUCCESS on success.
3155 * @retval VERR_NOT_OWNER if the caller is not an EMT.
3156 * @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
3157 * @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
3158 * that is we're trying to allocate more than we've reserved.
3159 *
3160 * @param pGVM The global (ring-0) VM structure.
3161 * @param idCpu The VCPU id.
3162 * @param cPages The number of pages to allocate.
3163 * @param paPages Pointer to the page descriptors.
3164 * See GMMPAGEDESC for details on what is expected on
3165 * input.
3166 * @param enmAccount The account to charge.
3167 *
3168 * @thread EMT.
3169 */
3170GMMR0DECL(int) GMMR0AllocatePages(PGVM pGVM, VMCPUID idCpu, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount)
3171{
3172 LogFlow(("GMMR0AllocatePages: pGVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pGVM, cPages, paPages, enmAccount));
3173
3174 /*
3175 * Validate, get basics and take the semaphore.
3176 */
3177 PGMM pGMM;
3178 GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
3179 int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
3180 if (RT_FAILURE(rc))
3181 return rc;
3182
3183 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
3184 AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
3185 AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - GUEST_PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
3186
3187 for (unsigned iPage = 0; iPage < cPages; iPage++)
3188 {
3189 AssertMsgReturn( paPages[iPage].HCPhysGCPhys == NIL_GMMPAGEDESC_PHYS
3190 || paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE
3191 || ( enmAccount == GMMACCOUNT_BASE
3192 && paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
3193 && !(paPages[iPage].HCPhysGCPhys & GUEST_PAGE_OFFSET_MASK)),
3194 ("#%#x: %RHp enmAccount=%d\n", iPage, paPages[iPage].HCPhysGCPhys, enmAccount),
3195 VERR_INVALID_PARAMETER);
3196 AssertMsgReturn(paPages[iPage].fZeroed == false, ("#%#x: %#x\n", iPage, paPages[iPage].fZeroed), VERR_INVALID_PARAMETER);
3197 AssertMsgReturn(paPages[iPage].idPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
3198 AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
3199 }
3200
3201 /*
3202 * Grab the giant mutex and get working.
3203 */
3204 gmmR0MutexAcquire(pGMM);
3205 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3206 {
3207
3208 /* No allocations before the initial reservation has been made! */
3209 if (RT_LIKELY( pGVM->gmm.s.Stats.Reserved.cBasePages
3210 && pGVM->gmm.s.Stats.Reserved.cFixedPages
3211 && pGVM->gmm.s.Stats.Reserved.cShadowPages))
3212 rc = gmmR0AllocatePagesNew(pGMM, pGVM, cPages, paPages, enmAccount);
3213 else
3214 rc = VERR_WRONG_ORDER;
3215 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3216 }
3217 else
3218 rc = VERR_GMM_IS_NOT_SANE;
3219 gmmR0MutexRelease(pGMM);
3220
3221 LogFlow(("GMMR0AllocatePages: returns %Rrc\n", rc));
3222 return rc;
3223}
3224
3225
3226/**
3227 * VMMR0 request wrapper for GMMR0AllocatePages.
3228 *
3229 * @returns see GMMR0AllocatePages.
3230 * @param pGVM The global (ring-0) VM structure.
3231 * @param idCpu The VCPU id.
3232 * @param pReq Pointer to the request packet.
3233 */
3234GMMR0DECL(int) GMMR0AllocatePagesReq(PGVM pGVM, VMCPUID idCpu, PGMMALLOCATEPAGESREQ pReq)
3235{
3236 /*
3237 * Validate input and pass it on.
3238 */
3239 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3240 AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0]),
3241 ("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0])),
3242 VERR_INVALID_PARAMETER);
3243 AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF_DYN(GMMALLOCATEPAGESREQ, aPages[pReq->cPages]),
3244 ("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF_DYN(GMMALLOCATEPAGESREQ, aPages[pReq->cPages])),
3245 VERR_INVALID_PARAMETER);
3246
3247 return GMMR0AllocatePages(pGVM, idCpu, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
3248}
3249
3250
3251/**
3252 * Allocate a large page to represent guest RAM
3253 *
3254 * The allocated pages are zeroed upon return.
3255 *
3256 * @returns VBox status code:
3257 * @retval VINF_SUCCESS on success.
3258 * @retval VERR_NOT_OWNER if the caller is not an EMT.
3259 * @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
3260 * @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
3261 * that is we're trying to allocate more than we've reserved.
3262 * @retval VERR_TRY_AGAIN if the host is temporarily out of large pages.
3263 * @returns see GMMR0AllocatePages.
3264 *
3265 * @param pGVM The global (ring-0) VM structure.
3266 * @param idCpu The VCPU id.
3267 * @param cbPage Large page size.
3268 * @param pIdPage Where to return the GMM page ID of the page.
3269 * @param pHCPhys Where to return the host physical address of the page.
3270 */
3271GMMR0DECL(int) GMMR0AllocateLargePage(PGVM pGVM, VMCPUID idCpu, uint32_t cbPage, uint32_t *pIdPage, RTHCPHYS *pHCPhys)
3272{
3273 LogFlow(("GMMR0AllocateLargePage: pGVM=%p cbPage=%x\n", pGVM, cbPage));
3274
3275 AssertPtrReturn(pIdPage, VERR_INVALID_PARAMETER);
3276 *pIdPage = NIL_GMM_PAGEID;
3277 AssertPtrReturn(pHCPhys, VERR_INVALID_PARAMETER);
3278 *pHCPhys = NIL_RTHCPHYS;
3279 AssertReturn(cbPage == GMM_CHUNK_SIZE, VERR_INVALID_PARAMETER);
3280
3281 /*
3282 * Validate GVM + idCpu, get basics and take the semaphore.
3283 */
3284 PGMM pGMM;
3285 GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
3286 int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
3287 AssertRCReturn(rc, rc);
3288
3289 VMMR0EMTBLOCKCTX Ctx;
3290 PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
3291 rc = VMMR0EmtPrepareToBlock(pGVCpu, VINF_SUCCESS, "GMMR0AllocateLargePage", pGMM, &Ctx);
3292 AssertRCReturn(rc, rc);
3293
3294 rc = gmmR0MutexAcquire(pGMM);
3295 if (RT_SUCCESS(rc))
3296 {
3297 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3298 {
3299 /*
3300 * Check the quota.
3301 */
3302 /** @todo r=bird: Quota checking could be done w/o the giant mutex but using
3303 * a VM specific mutex... */
3304 if (RT_LIKELY( pGVM->gmm.s.Stats.Allocated.cBasePages + pGVM->gmm.s.Stats.cBalloonedPages + GMM_CHUNK_NUM_PAGES
3305 <= pGVM->gmm.s.Stats.Reserved.cBasePages))
3306 {
3307 /*
3308 * Allocate a new large page chunk.
3309 *
3310 * Note! We leave the giant GMM lock temporarily as the allocation might
3311 * take a long time. gmmR0RegisterChunk will retake it (ugly).
3312 */
3313 AssertCompile(GMM_CHUNK_SIZE == _2M);
3314 gmmR0MutexRelease(pGMM);
3315
3316 RTR0MEMOBJ hMemObj;
3317 rc = RTR0MemObjAllocLarge(&hMemObj, GMM_CHUNK_SIZE, GMM_CHUNK_SIZE, RTMEMOBJ_ALLOC_LARGE_F_FAST);
3318 if (RT_SUCCESS(rc))
3319 {
3320 *pHCPhys = RTR0MemObjGetPagePhysAddr(hMemObj, 0);
3321
3322 /*
3323 * Register the chunk as fully allocated.
3324 * Note! As mentioned above, this will return owning the mutex on success.
3325 */
3326 PGMMCHUNK pChunk = NULL;
3327 PGMMCHUNKFREESET const pSet = pGMM->fBoundMemoryMode ? &pGVM->gmm.s.Private : &pGMM->PrivateX;
3328 rc = gmmR0RegisterChunk(pGMM, pSet, hMemObj, pGVM->hSelf, pGVM->pSession, GMM_CHUNK_FLAGS_LARGE_PAGE,
3329 0 /*cPages*/, NULL /*paPages*/, NULL /*piPage*/, &pChunk);
3330 if (RT_SUCCESS(rc))
3331 {
3332 /*
3333 * The gmmR0RegisterChunk call already marked all pages allocated,
3334 * so we just have to fill in the return values and update stats now.
3335 */
3336 *pIdPage = pChunk->Core.Key << GMM_CHUNKID_SHIFT;
3337
3338 /* Update accounting. */
3339 pGVM->gmm.s.Stats.Allocated.cBasePages += GMM_CHUNK_NUM_PAGES;
3340 pGVM->gmm.s.Stats.cPrivatePages += GMM_CHUNK_NUM_PAGES;
3341 pGMM->cAllocatedPages += GMM_CHUNK_NUM_PAGES;
3342
3343 gmmR0LinkChunk(pChunk, pSet);
3344 gmmR0MutexRelease(pGMM);
3345
3346 VMMR0EmtResumeAfterBlocking(pGVCpu, &Ctx);
3347 LogFlow(("GMMR0AllocateLargePage: returns VINF_SUCCESS\n"));
3348 return VINF_SUCCESS;
3349 }
3350
3351 /*
3352 * Bail out.
3353 */
3354 RTR0MemObjFree(hMemObj, true /* fFreeMappings */);
3355 *pHCPhys = NIL_RTHCPHYS;
3356 }
3357 /** @todo r=bird: Turn VERR_NO_MEMORY etc into VERR_TRY_AGAIN? Docs say we
3358 * return it, but I am sure IPRT doesn't... */
3359 }
3360 else
3361 {
3362 Log(("GMMR0AllocateLargePage: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
3363 pGVM->gmm.s.Stats.Reserved.cBasePages, pGVM->gmm.s.Stats.Allocated.cBasePages, GMM_CHUNK_NUM_PAGES));
3364 gmmR0MutexRelease(pGMM);
3365 rc = VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
3366 }
3367 }
3368 else
3369 {
3370 gmmR0MutexRelease(pGMM);
3371 rc = VERR_GMM_IS_NOT_SANE;
3372 }
3373 }
3374
3375 VMMR0EmtResumeAfterBlocking(pGVCpu, &Ctx);
3376 LogFlow(("GMMR0AllocateLargePage: returns %Rrc\n", rc));
3377 return rc;
3378}
3379
3380
3381/**
3382 * Free a large page.
3383 *
3384 * @returns VBox status code:
3385 * @param pGVM The global (ring-0) VM structure.
3386 * @param idCpu The VCPU id.
3387 * @param idPage The large page id.
3388 */
3389GMMR0DECL(int) GMMR0FreeLargePage(PGVM pGVM, VMCPUID idCpu, uint32_t idPage)
3390{
3391 LogFlow(("GMMR0FreeLargePage: pGVM=%p idPage=%x\n", pGVM, idPage));
3392
3393 /*
3394 * Validate, get basics and take the semaphore.
3395 */
3396 PGMM pGMM;
3397 GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
3398 int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
3399 if (RT_FAILURE(rc))
3400 return rc;
3401
3402 gmmR0MutexAcquire(pGMM);
3403 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3404 {
3405 const unsigned cPages = GMM_CHUNK_NUM_PAGES;
3406
3407 if (RT_UNLIKELY(pGVM->gmm.s.Stats.Allocated.cBasePages < cPages))
3408 {
3409 Log(("GMMR0FreeLargePage: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Stats.Allocated.cBasePages, cPages));
3410 gmmR0MutexRelease(pGMM);
3411 return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
3412 }
3413
3414 PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
3415 if (RT_LIKELY( pPage
3416 && GMM_PAGE_IS_PRIVATE(pPage)))
3417 {
3418 PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
3419 Assert(pChunk);
3420 Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
3421 Assert(pChunk->cPrivate > 0);
3422
3423 /* Release the memory immediately. */
3424 gmmR0FreeChunk(pGMM, NULL, pChunk, false /*fRelaxedSem*/); /** @todo this can be relaxed too! */
3425
3426 /* Update accounting. */
3427 pGVM->gmm.s.Stats.Allocated.cBasePages -= cPages;
3428 pGVM->gmm.s.Stats.cPrivatePages -= cPages;
3429 pGMM->cAllocatedPages -= cPages;
3430 }
3431 else
3432 rc = VERR_GMM_PAGE_NOT_FOUND;
3433 }
3434 else
3435 rc = VERR_GMM_IS_NOT_SANE;
3436
3437 gmmR0MutexRelease(pGMM);
3438 LogFlow(("GMMR0FreeLargePage: returns %Rrc\n", rc));
3439 return rc;
3440}
3441
3442
3443/**
3444 * VMMR0 request wrapper for GMMR0FreeLargePage.
3445 *
3446 * @returns see GMMR0FreeLargePage.
3447 * @param pGVM The global (ring-0) VM structure.
3448 * @param idCpu The VCPU id.
3449 * @param pReq Pointer to the request packet.
3450 */
3451GMMR0DECL(int) GMMR0FreeLargePageReq(PGVM pGVM, VMCPUID idCpu, PGMMFREELARGEPAGEREQ pReq)
3452{
3453 /*
3454 * Validate input and pass it on.
3455 */
3456 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3457 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMFREEPAGESREQ),
3458 ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(GMMFREEPAGESREQ)),
3459 VERR_INVALID_PARAMETER);
3460
3461 return GMMR0FreeLargePage(pGVM, idCpu, pReq->idPage);
3462}
3463
3464
3465/**
3466 * @callback_method_impl{FNGVMMR0ENUMCALLBACK,
3467 * Used by gmmR0FreeChunkFlushPerVmTlbs().}
3468 */
3469static DECLCALLBACK(int) gmmR0InvalidatePerVmChunkTlbCallback(PGVM pGVM, void *pvUser)
3470{
3471 RT_NOREF(pvUser);
3472 if (pGVM->gmm.s.hChunkTlbSpinLock != NIL_RTSPINLOCK)
3473 {
3474 RTSpinlockAcquire(pGVM->gmm.s.hChunkTlbSpinLock);
3475 uintptr_t i = RT_ELEMENTS(pGVM->gmm.s.aChunkTlbEntries);
3476 while (i-- > 0)
3477 {
3478 pGVM->gmm.s.aChunkTlbEntries[i].idGeneration = UINT64_MAX;
3479 pGVM->gmm.s.aChunkTlbEntries[i].pChunk = NULL;
3480 }
3481 RTSpinlockRelease(pGVM->gmm.s.hChunkTlbSpinLock);
3482 }
3483 return VINF_SUCCESS;
3484}
3485
3486
3487/**
3488 * Called by gmmR0FreeChunk when we reach the threshold for wrapping around the
3489 * free generation ID value.
3490 *
3491 * This is done at 2^62 - 1, which allows us to drop all locks and as it will
3492 * take a while before 12 exa (2 305 843 009 213 693 952) calls to
3493 * gmmR0FreeChunk can be made and causes a real wrap-around. We do two
3494 * invalidation passes and resets the generation ID between then. This will
3495 * make sure there are no false positives.
3496 *
3497 * @param pGMM Pointer to the GMM instance.
3498 */
3499static void gmmR0FreeChunkFlushPerVmTlbs(PGMM pGMM)
3500{
3501 /*
3502 * First invalidation pass.
3503 */
3504 int rc = GVMMR0EnumVMs(gmmR0InvalidatePerVmChunkTlbCallback, NULL);
3505 AssertRCSuccess(rc);
3506
3507 /*
3508 * Reset the generation number.
3509 */
3510 RTSpinlockAcquire(pGMM->hSpinLockTree);
3511 ASMAtomicWriteU64(&pGMM->idFreeGeneration, 1);
3512 RTSpinlockRelease(pGMM->hSpinLockTree);
3513
3514 /*
3515 * Second invalidation pass.
3516 */
3517 rc = GVMMR0EnumVMs(gmmR0InvalidatePerVmChunkTlbCallback, NULL);
3518 AssertRCSuccess(rc);
3519}
3520
3521
3522/**
3523 * Frees a chunk, giving it back to the host OS.
3524 *
3525 * @param pGMM Pointer to the GMM instance.
3526 * @param pGVM This is set when called from GMMR0CleanupVM so we can
3527 * unmap and free the chunk in one go.
3528 * @param pChunk The chunk to free.
3529 * @param fRelaxedSem Whether we can release the semaphore while doing the
3530 * freeing (@c true) or not.
3531 */
3532static bool gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem)
3533{
3534 Assert(pChunk->Core.Key != NIL_GMM_CHUNKID);
3535
3536 GMMR0CHUNKMTXSTATE MtxState;
3537 gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk, GMMR0CHUNK_MTX_KEEP_GIANT);
3538
3539 /*
3540 * Cleanup hack! Unmap the chunk from the callers address space.
3541 * This shouldn't happen, so screw lock contention...
3542 */
3543 if (pChunk->cMappingsX && pGVM)
3544 gmmR0UnmapChunkLocked(pGMM, pGVM, pChunk);
3545
3546 /*
3547 * If there are current mappings of the chunk, then request the
3548 * VMs to unmap them. Reposition the chunk in the free list so
3549 * it won't be a likely candidate for allocations.
3550 */
3551 if (pChunk->cMappingsX)
3552 {
3553 /** @todo R0 -> VM request */
3554 /* The chunk can be mapped by more than one VM if fBoundMemoryMode is false! */
3555 Log(("gmmR0FreeChunk: chunk still has %d mappings; don't free!\n", pChunk->cMappingsX));
3556 gmmR0ChunkMutexRelease(&MtxState, pChunk);
3557 return false;
3558 }
3559
3560
3561 /*
3562 * Save and trash the handle.
3563 */
3564 RTR0MEMOBJ const hMemObj = pChunk->hMemObj;
3565 pChunk->hMemObj = NIL_RTR0MEMOBJ;
3566
3567 /*
3568 * Unlink it from everywhere.
3569 */
3570 gmmR0UnlinkChunk(pChunk);
3571
3572 RTSpinlockAcquire(pGMM->hSpinLockTree);
3573
3574 RTListNodeRemove(&pChunk->ListNode);
3575
3576 PAVLU32NODECORE pCore = RTAvlU32Remove(&pGMM->pChunks, pChunk->Core.Key);
3577 Assert(pCore == &pChunk->Core); NOREF(pCore);
3578
3579 PGMMCHUNKTLBE pTlbe = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(pChunk->Core.Key)];
3580 if (pTlbe->pChunk == pChunk)
3581 {
3582 pTlbe->idChunk = NIL_GMM_CHUNKID;
3583 pTlbe->pChunk = NULL;
3584 }
3585
3586 Assert(pGMM->cChunks > 0);
3587 pGMM->cChunks--;
3588
3589 uint64_t const idFreeGeneration = ASMAtomicIncU64(&pGMM->idFreeGeneration);
3590
3591 RTSpinlockRelease(pGMM->hSpinLockTree);
3592
3593 pGMM->cFreedChunks++;
3594
3595 /* Drop the lock. */
3596 gmmR0ChunkMutexRelease(&MtxState, NULL);
3597 if (fRelaxedSem)
3598 gmmR0MutexRelease(pGMM);
3599
3600 /*
3601 * Flush per VM chunk TLBs if we're getting remotely close to a generation wraparound.
3602 */
3603 if (idFreeGeneration == UINT64_MAX / 4)
3604 gmmR0FreeChunkFlushPerVmTlbs(pGMM);
3605
3606 /*
3607 * Free the Chunk ID and all memory associated with the chunk.
3608 */
3609 gmmR0FreeChunkId(pGMM, pChunk->Core.Key);
3610 pChunk->Core.Key = NIL_GMM_CHUNKID;
3611
3612 RTMemFree(pChunk->paMappingsX);
3613 pChunk->paMappingsX = NULL;
3614
3615 RTMemFree(pChunk);
3616
3617#ifndef VBOX_WITH_LINEAR_HOST_PHYS_MEM
3618 int rc = RTR0MemObjFree(hMemObj, true /* fFreeMappings */);
3619#else
3620 int rc = RTR0MemObjFree(hMemObj, false /* fFreeMappings */);
3621#endif
3622 AssertLogRelRC(rc);
3623
3624 if (fRelaxedSem)
3625 gmmR0MutexAcquire(pGMM);
3626 return fRelaxedSem;
3627}
3628
3629
3630/**
3631 * Free page worker.
3632 *
3633 * The caller does all the statistic decrementing, we do all the incrementing.
3634 *
3635 * @param pGMM Pointer to the GMM instance data.
3636 * @param pGVM Pointer to the GVM instance.
3637 * @param pChunk Pointer to the chunk this page belongs to.
3638 * @param idPage The Page ID.
3639 * @param pPage Pointer to the page.
3640 */
3641static void gmmR0FreePageWorker(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, uint32_t idPage, PGMMPAGE pPage)
3642{
3643 Log3(("F pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x\n",
3644 pPage, pPage - &pChunk->aPages[0], idPage, pPage->Common.u2State, pChunk->iFreeHead)); NOREF(idPage);
3645
3646 /*
3647 * Put the page on the free list.
3648 */
3649 pPage->u = 0;
3650 pPage->Free.u2State = GMM_PAGE_STATE_FREE;
3651 pPage->Free.fZeroed = false;
3652 Assert(pChunk->iFreeHead < RT_ELEMENTS(pChunk->aPages) || pChunk->iFreeHead == UINT16_MAX);
3653 pPage->Free.iNext = pChunk->iFreeHead;
3654 pChunk->iFreeHead = pPage - &pChunk->aPages[0];
3655
3656 /*
3657 * Update statistics (the cShared/cPrivate stats are up to date already),
3658 * and relink the chunk if necessary.
3659 */
3660 unsigned const cFree = pChunk->cFree;
3661 if ( !cFree
3662 || gmmR0SelectFreeSetList(cFree) != gmmR0SelectFreeSetList(cFree + 1))
3663 {
3664 gmmR0UnlinkChunk(pChunk);
3665 pChunk->cFree++;
3666 gmmR0SelectSetAndLinkChunk(pGMM, pGVM, pChunk);
3667 }
3668 else
3669 {
3670 pChunk->cFree = cFree + 1;
3671 pChunk->pSet->cFreePages++;
3672 }
3673
3674 /*
3675 * If the chunk becomes empty, consider giving memory back to the host OS.
3676 *
3677 * The current strategy is to try give it back if there are other chunks
3678 * in this free list, meaning if there are at least 240 free pages in this
3679 * category. Note that since there are probably mappings of the chunk,
3680 * it won't be freed up instantly, which probably screws up this logic
3681 * a bit...
3682 */
3683 /** @todo Do this on the way out. */
3684 if (RT_LIKELY( pChunk->cFree != GMM_CHUNK_NUM_PAGES
3685 || pChunk->pFreeNext == NULL
3686 || pChunk->pFreePrev == NULL /** @todo this is probably misfiring, see reset... */))
3687 { /* likely */ }
3688 else
3689 gmmR0FreeChunk(pGMM, NULL, pChunk, false);
3690}
3691
3692
3693/**
3694 * Frees a shared page, the page is known to exist and be valid and such.
3695 *
3696 * @param pGMM Pointer to the GMM instance.
3697 * @param pGVM Pointer to the GVM instance.
3698 * @param idPage The page id.
3699 * @param pPage The page structure.
3700 */
3701DECLINLINE(void) gmmR0FreeSharedPage(PGMM pGMM, PGVM pGVM, uint32_t idPage, PGMMPAGE pPage)
3702{
3703 PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
3704 Assert(pChunk);
3705 Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
3706 Assert(pChunk->cShared > 0);
3707 Assert(pGMM->cSharedPages > 0);
3708 Assert(pGMM->cAllocatedPages > 0);
3709 Assert(!pPage->Shared.cRefs);
3710
3711 pChunk->cShared--;
3712 pGMM->cAllocatedPages--;
3713 pGMM->cSharedPages--;
3714 gmmR0FreePageWorker(pGMM, pGVM, pChunk, idPage, pPage);
3715}
3716
3717
3718/**
3719 * Frees a private page, the page is known to exist and be valid and such.
3720 *
3721 * @param pGMM Pointer to the GMM instance.
3722 * @param pGVM Pointer to the GVM instance.
3723 * @param idPage The page id.
3724 * @param pPage The page structure.
3725 */
3726DECLINLINE(void) gmmR0FreePrivatePage(PGMM pGMM, PGVM pGVM, uint32_t idPage, PGMMPAGE pPage)
3727{
3728 PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
3729 Assert(pChunk);
3730 Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
3731 Assert(pChunk->cPrivate > 0);
3732 Assert(pGMM->cAllocatedPages > 0);
3733
3734 pChunk->cPrivate--;
3735 pGMM->cAllocatedPages--;
3736 gmmR0FreePageWorker(pGMM, pGVM, pChunk, idPage, pPage);
3737}
3738
3739
3740/**
3741 * Common worker for GMMR0FreePages and GMMR0BalloonedPages.
3742 *
3743 * @returns VBox status code:
3744 * @retval xxx
3745 *
3746 * @param pGMM Pointer to the GMM instance data.
3747 * @param pGVM Pointer to the VM.
3748 * @param cPages The number of pages to free.
3749 * @param paPages Pointer to the page descriptors.
3750 * @param enmAccount The account this relates to.
3751 */
3752static int gmmR0FreePages(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
3753{
3754 /*
3755 * Check that the request isn't impossible wrt to the account status.
3756 */
3757 switch (enmAccount)
3758 {
3759 case GMMACCOUNT_BASE:
3760 if (RT_UNLIKELY(pGVM->gmm.s.Stats.Allocated.cBasePages < cPages))
3761 {
3762 Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Stats.Allocated.cBasePages, cPages));
3763 return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
3764 }
3765 break;
3766 case GMMACCOUNT_SHADOW:
3767 if (RT_UNLIKELY(pGVM->gmm.s.Stats.Allocated.cShadowPages < cPages))
3768 {
3769 Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Stats.Allocated.cShadowPages, cPages));
3770 return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
3771 }
3772 break;
3773 case GMMACCOUNT_FIXED:
3774 if (RT_UNLIKELY(pGVM->gmm.s.Stats.Allocated.cFixedPages < cPages))
3775 {
3776 Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Stats.Allocated.cFixedPages, cPages));
3777 return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
3778 }
3779 break;
3780 default:
3781 AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_IPE_NOT_REACHED_DEFAULT_CASE);
3782 }
3783
3784 /*
3785 * Walk the descriptors and free the pages.
3786 *
3787 * Statistics (except the account) are being updated as we go along,
3788 * unlike the alloc code. Also, stop on the first error.
3789 */
3790 int rc = VINF_SUCCESS;
3791 uint32_t iPage;
3792 for (iPage = 0; iPage < cPages; iPage++)
3793 {
3794 uint32_t idPage = paPages[iPage].idPage;
3795 PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
3796 if (RT_LIKELY(pPage))
3797 {
3798 if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
3799 {
3800 if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
3801 {
3802 Assert(pGVM->gmm.s.Stats.cPrivatePages);
3803 pGVM->gmm.s.Stats.cPrivatePages--;
3804 gmmR0FreePrivatePage(pGMM, pGVM, idPage, pPage);
3805 }
3806 else
3807 {
3808 Log(("gmmR0AllocatePages: #%#x/%#x: not owner! hGVM=%#x hSelf=%#x\n", iPage, idPage,
3809 pPage->Private.hGVM, pGVM->hSelf));
3810 rc = VERR_GMM_NOT_PAGE_OWNER;
3811 break;
3812 }
3813 }
3814 else if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
3815 {
3816 Assert(pGVM->gmm.s.Stats.cSharedPages);
3817 Assert(pPage->Shared.cRefs);
3818#if defined(VBOX_WITH_PAGE_SHARING) && defined(VBOX_STRICT)
3819 if (pPage->Shared.u14Checksum)
3820 {
3821 uint32_t uChecksum = gmmR0StrictPageChecksum(pGMM, pGVM, idPage);
3822 uChecksum &= UINT32_C(0x00003fff);
3823 AssertMsg(!uChecksum || uChecksum == pPage->Shared.u14Checksum,
3824 ("%#x vs %#x - idPage=%#x\n", uChecksum, pPage->Shared.u14Checksum, idPage));
3825 }
3826#endif
3827 pGVM->gmm.s.Stats.cSharedPages--;
3828 if (!--pPage->Shared.cRefs)
3829 gmmR0FreeSharedPage(pGMM, pGVM, idPage, pPage);
3830 else
3831 {
3832 Assert(pGMM->cDuplicatePages);
3833 pGMM->cDuplicatePages--;
3834 }
3835 }
3836 else
3837 {
3838 Log(("gmmR0AllocatePages: #%#x/%#x: already free!\n", iPage, idPage));
3839 rc = VERR_GMM_PAGE_ALREADY_FREE;
3840 break;
3841 }
3842 }
3843 else
3844 {
3845 Log(("gmmR0AllocatePages: #%#x/%#x: not found!\n", iPage, idPage));
3846 rc = VERR_GMM_PAGE_NOT_FOUND;
3847 break;
3848 }
3849 paPages[iPage].idPage = NIL_GMM_PAGEID;
3850 }
3851
3852 /*
3853 * Update the account.
3854 */
3855 switch (enmAccount)
3856 {
3857 case GMMACCOUNT_BASE: pGVM->gmm.s.Stats.Allocated.cBasePages -= iPage; break;
3858 case GMMACCOUNT_SHADOW: pGVM->gmm.s.Stats.Allocated.cShadowPages -= iPage; break;
3859 case GMMACCOUNT_FIXED: pGVM->gmm.s.Stats.Allocated.cFixedPages -= iPage; break;
3860 default:
3861 AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_IPE_NOT_REACHED_DEFAULT_CASE);
3862 }
3863
3864 /*
3865 * Any threshold stuff to be done here?
3866 */
3867
3868 return rc;
3869}
3870
3871
3872/**
3873 * Free one or more pages.
3874 *
3875 * This is typically used at reset time or power off.
3876 *
3877 * @returns VBox status code:
3878 * @retval xxx
3879 *
3880 * @param pGVM The global (ring-0) VM structure.
3881 * @param idCpu The VCPU id.
3882 * @param cPages The number of pages to allocate.
3883 * @param paPages Pointer to the page descriptors containing the page IDs
3884 * for each page.
3885 * @param enmAccount The account this relates to.
3886 * @thread EMT.
3887 */
3888GMMR0DECL(int) GMMR0FreePages(PGVM pGVM, VMCPUID idCpu, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
3889{
3890 LogFlow(("GMMR0FreePages: pGVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pGVM, cPages, paPages, enmAccount));
3891
3892 /*
3893 * Validate input and get the basics.
3894 */
3895 PGMM pGMM;
3896 GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
3897 int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
3898 if (RT_FAILURE(rc))
3899 return rc;
3900
3901 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
3902 AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
3903 AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - GUEST_PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
3904
3905 for (unsigned iPage = 0; iPage < cPages; iPage++)
3906 AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
3907 /*|| paPages[iPage].idPage == NIL_GMM_PAGEID*/,
3908 ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
3909
3910 /*
3911 * Take the semaphore and call the worker function.
3912 */
3913 gmmR0MutexAcquire(pGMM);
3914 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3915 {
3916 rc = gmmR0FreePages(pGMM, pGVM, cPages, paPages, enmAccount);
3917 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3918 }
3919 else
3920 rc = VERR_GMM_IS_NOT_SANE;
3921 gmmR0MutexRelease(pGMM);
3922 LogFlow(("GMMR0FreePages: returns %Rrc\n", rc));
3923 return rc;
3924}
3925
3926
3927/**
3928 * VMMR0 request wrapper for GMMR0FreePages.
3929 *
3930 * @returns see GMMR0FreePages.
3931 * @param pGVM The global (ring-0) VM structure.
3932 * @param idCpu The VCPU id.
3933 * @param pReq Pointer to the request packet.
3934 */
3935GMMR0DECL(int) GMMR0FreePagesReq(PGVM pGVM, VMCPUID idCpu, PGMMFREEPAGESREQ pReq)
3936{
3937 /*
3938 * Validate input and pass it on.
3939 */
3940 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3941 AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0]),
3942 ("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0])),
3943 VERR_INVALID_PARAMETER);
3944 AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF_DYN(GMMFREEPAGESREQ, aPages[pReq->cPages]),
3945 ("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF_DYN(GMMFREEPAGESREQ, aPages[pReq->cPages])),
3946 VERR_INVALID_PARAMETER);
3947
3948 return GMMR0FreePages(pGVM, idCpu, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
3949}
3950
3951
3952/**
3953 * Report back on a memory ballooning request.
3954 *
3955 * The request may or may not have been initiated by the GMM. If it was initiated
3956 * by the GMM it is important that this function is called even if no pages were
3957 * ballooned.
3958 *
3959 * @returns VBox status code:
3960 * @retval VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH
3961 * @retval VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH
3962 * @retval VERR_GMM_OVERCOMMITTED_TRY_AGAIN_IN_A_BIT - reset condition
3963 * indicating that we won't necessarily have sufficient RAM to boot
3964 * the VM again and that it should pause until this changes (we'll try
3965 * balloon some other VM). (For standard deflate we have little choice
3966 * but to hope the VM won't use the memory that was returned to it.)
3967 *
3968 * @param pGVM The global (ring-0) VM structure.
3969 * @param idCpu The VCPU id.
3970 * @param enmAction Inflate/deflate/reset.
3971 * @param cBalloonedPages The number of pages that was ballooned.
3972 *
3973 * @thread EMT(idCpu)
3974 */
3975GMMR0DECL(int) GMMR0BalloonedPages(PGVM pGVM, VMCPUID idCpu, GMMBALLOONACTION enmAction, uint32_t cBalloonedPages)
3976{
3977 LogFlow(("GMMR0BalloonedPages: pGVM=%p enmAction=%d cBalloonedPages=%#x\n",
3978 pGVM, enmAction, cBalloonedPages));
3979
3980 AssertMsgReturn(cBalloonedPages < RT_BIT(32 - GUEST_PAGE_SHIFT), ("%#x\n", cBalloonedPages), VERR_INVALID_PARAMETER);
3981
3982 /*
3983 * Validate input and get the basics.
3984 */
3985 PGMM pGMM;
3986 GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
3987 int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
3988 if (RT_FAILURE(rc))
3989 return rc;
3990
3991 /*
3992 * Take the semaphore and do some more validations.
3993 */
3994 gmmR0MutexAcquire(pGMM);
3995 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3996 {
3997 switch (enmAction)
3998 {
3999 case GMMBALLOONACTION_INFLATE:
4000 {
4001 if (RT_LIKELY(pGVM->gmm.s.Stats.Allocated.cBasePages + pGVM->gmm.s.Stats.cBalloonedPages + cBalloonedPages
4002 <= pGVM->gmm.s.Stats.Reserved.cBasePages))
4003 {
4004 /*
4005 * Record the ballooned memory.
4006 */
4007 pGMM->cBalloonedPages += cBalloonedPages;
4008 if (pGVM->gmm.s.Stats.cReqBalloonedPages)
4009 {
4010 /* Codepath never taken. Might be interesting in the future to request ballooned memory from guests in low memory conditions.. */
4011 AssertFailed();
4012
4013 pGVM->gmm.s.Stats.cBalloonedPages += cBalloonedPages;
4014 pGVM->gmm.s.Stats.cReqActuallyBalloonedPages += cBalloonedPages;
4015 Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx Req=%#llx Actual=%#llx (pending)\n",
4016 cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.Stats.cBalloonedPages,
4017 pGVM->gmm.s.Stats.cReqBalloonedPages, pGVM->gmm.s.Stats.cReqActuallyBalloonedPages));
4018 }
4019 else
4020 {
4021 pGVM->gmm.s.Stats.cBalloonedPages += cBalloonedPages;
4022 Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx (user)\n",
4023 cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.Stats.cBalloonedPages));
4024 }
4025 }
4026 else
4027 {
4028 Log(("GMMR0BalloonedPages: cBasePages=%#llx Total=%#llx cBalloonedPages=%#llx Reserved=%#llx\n",
4029 pGVM->gmm.s.Stats.Allocated.cBasePages, pGVM->gmm.s.Stats.cBalloonedPages, cBalloonedPages,
4030 pGVM->gmm.s.Stats.Reserved.cBasePages));
4031 rc = VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
4032 }
4033 break;
4034 }
4035
4036 case GMMBALLOONACTION_DEFLATE:
4037 {
4038 /* Deflate. */
4039 if (pGVM->gmm.s.Stats.cBalloonedPages >= cBalloonedPages)
4040 {
4041 /*
4042 * Record the ballooned memory.
4043 */
4044 Assert(pGMM->cBalloonedPages >= cBalloonedPages);
4045 pGMM->cBalloonedPages -= cBalloonedPages;
4046 pGVM->gmm.s.Stats.cBalloonedPages -= cBalloonedPages;
4047 if (pGVM->gmm.s.Stats.cReqDeflatePages)
4048 {
4049 AssertFailed(); /* This is path is for later. */
4050 Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx Req=%#llx\n",
4051 cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.Stats.cBalloonedPages, pGVM->gmm.s.Stats.cReqDeflatePages));
4052
4053 /*
4054 * Anything we need to do here now when the request has been completed?
4055 */
4056 pGVM->gmm.s.Stats.cReqDeflatePages = 0;
4057 }
4058 else
4059 Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx (user)\n",
4060 cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.Stats.cBalloonedPages));
4061 }
4062 else
4063 {
4064 Log(("GMMR0BalloonedPages: Total=%#llx cBalloonedPages=%#llx\n", pGVM->gmm.s.Stats.cBalloonedPages, cBalloonedPages));
4065 rc = VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH;
4066 }
4067 break;
4068 }
4069
4070 case GMMBALLOONACTION_RESET:
4071 {
4072 /* Reset to an empty balloon. */
4073 Assert(pGMM->cBalloonedPages >= pGVM->gmm.s.Stats.cBalloonedPages);
4074
4075 pGMM->cBalloonedPages -= pGVM->gmm.s.Stats.cBalloonedPages;
4076 pGVM->gmm.s.Stats.cBalloonedPages = 0;
4077 break;
4078 }
4079
4080 default:
4081 rc = VERR_INVALID_PARAMETER;
4082 break;
4083 }
4084 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4085 }
4086 else
4087 rc = VERR_GMM_IS_NOT_SANE;
4088
4089 gmmR0MutexRelease(pGMM);
4090 LogFlow(("GMMR0BalloonedPages: returns %Rrc\n", rc));
4091 return rc;
4092}
4093
4094
4095/**
4096 * VMMR0 request wrapper for GMMR0BalloonedPages.
4097 *
4098 * @returns see GMMR0BalloonedPages.
4099 * @param pGVM The global (ring-0) VM structure.
4100 * @param idCpu The VCPU id.
4101 * @param pReq Pointer to the request packet.
4102 */
4103GMMR0DECL(int) GMMR0BalloonedPagesReq(PGVM pGVM, VMCPUID idCpu, PGMMBALLOONEDPAGESREQ pReq)
4104{
4105 /*
4106 * Validate input and pass it on.
4107 */
4108 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
4109 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMBALLOONEDPAGESREQ),
4110 ("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMBALLOONEDPAGESREQ)),
4111 VERR_INVALID_PARAMETER);
4112
4113 return GMMR0BalloonedPages(pGVM, idCpu, pReq->enmAction, pReq->cBalloonedPages);
4114}
4115
4116
4117/**
4118 * Return memory statistics for the hypervisor
4119 *
4120 * @returns VBox status code.
4121 * @param pReq Pointer to the request packet.
4122 */
4123GMMR0DECL(int) GMMR0QueryHypervisorMemoryStatsReq(PGMMMEMSTATSREQ pReq)
4124{
4125 /*
4126 * Validate input and pass it on.
4127 */
4128 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
4129 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMMEMSTATSREQ),
4130 ("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMMEMSTATSREQ)),
4131 VERR_INVALID_PARAMETER);
4132
4133 /*
4134 * Validate input and get the basics.
4135 */
4136 PGMM pGMM;
4137 GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
4138 pReq->cAllocPages = pGMM->cAllocatedPages;
4139 pReq->cFreePages = (pGMM->cChunks << (GMM_CHUNK_SHIFT - GUEST_PAGE_SHIFT)) - pGMM->cAllocatedPages;
4140 pReq->cBalloonedPages = pGMM->cBalloonedPages;
4141 pReq->cMaxPages = pGMM->cMaxPages;
4142 pReq->cSharedPages = pGMM->cDuplicatePages;
4143 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4144
4145 return VINF_SUCCESS;
4146}
4147
4148
4149/**
4150 * Return memory statistics for the VM
4151 *
4152 * @returns VBox status code.
4153 * @param pGVM The global (ring-0) VM structure.
4154 * @param idCpu Cpu id.
4155 * @param pReq Pointer to the request packet.
4156 *
4157 * @thread EMT(idCpu)
4158 */
4159GMMR0DECL(int) GMMR0QueryMemoryStatsReq(PGVM pGVM, VMCPUID idCpu, PGMMMEMSTATSREQ pReq)
4160{
4161 /*
4162 * Validate input and pass it on.
4163 */
4164 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
4165 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMMEMSTATSREQ),
4166 ("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMMEMSTATSREQ)),
4167 VERR_INVALID_PARAMETER);
4168
4169 /*
4170 * Validate input and get the basics.
4171 */
4172 PGMM pGMM;
4173 GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
4174 int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
4175 if (RT_FAILURE(rc))
4176 return rc;
4177
4178 /*
4179 * Take the semaphore and do some more validations.
4180 */
4181 gmmR0MutexAcquire(pGMM);
4182 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4183 {
4184 pReq->cAllocPages = pGVM->gmm.s.Stats.Allocated.cBasePages;
4185 pReq->cBalloonedPages = pGVM->gmm.s.Stats.cBalloonedPages;
4186 pReq->cMaxPages = pGVM->gmm.s.Stats.Reserved.cBasePages;
4187 pReq->cFreePages = pReq->cMaxPages - pReq->cAllocPages;
4188 }
4189 else
4190 rc = VERR_GMM_IS_NOT_SANE;
4191
4192 gmmR0MutexRelease(pGMM);
4193 LogFlow(("GMMR3QueryVMMemoryStats: returns %Rrc\n", rc));
4194 return rc;
4195}
4196
4197
4198/**
4199 * Worker for gmmR0UnmapChunk and gmmr0FreeChunk.
4200 *
4201 * Don't call this in legacy allocation mode!
4202 *
4203 * @returns VBox status code.
4204 * @param pGMM Pointer to the GMM instance data.
4205 * @param pGVM Pointer to the Global VM structure.
4206 * @param pChunk Pointer to the chunk to be unmapped.
4207 */
4208static int gmmR0UnmapChunkLocked(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
4209{
4210 RT_NOREF_PV(pGMM);
4211
4212 /*
4213 * Find the mapping and try unmapping it.
4214 */
4215 uint32_t cMappings = pChunk->cMappingsX;
4216 for (uint32_t i = 0; i < cMappings; i++)
4217 {
4218 Assert(pChunk->paMappingsX[i].pGVM && pChunk->paMappingsX[i].hMapObj != NIL_RTR0MEMOBJ);
4219 if (pChunk->paMappingsX[i].pGVM == pGVM)
4220 {
4221 /* unmap */
4222 int rc = RTR0MemObjFree(pChunk->paMappingsX[i].hMapObj, false /* fFreeMappings (NA) */);
4223 if (RT_SUCCESS(rc))
4224 {
4225 /* update the record. */
4226 cMappings--;
4227 if (i < cMappings)
4228 pChunk->paMappingsX[i] = pChunk->paMappingsX[cMappings];
4229 pChunk->paMappingsX[cMappings].hMapObj = NIL_RTR0MEMOBJ;
4230 pChunk->paMappingsX[cMappings].pGVM = NULL;
4231 Assert(pChunk->cMappingsX - 1U == cMappings);
4232 pChunk->cMappingsX = cMappings;
4233 }
4234
4235 return rc;
4236 }
4237 }
4238
4239 Log(("gmmR0UnmapChunk: Chunk %#x is not mapped into pGVM=%p/%#x\n", pChunk->Core.Key, pGVM, pGVM->hSelf));
4240 return VERR_GMM_CHUNK_NOT_MAPPED;
4241}
4242
4243
4244/**
4245 * Unmaps a chunk previously mapped into the address space of the current process.
4246 *
4247 * @returns VBox status code.
4248 * @param pGMM Pointer to the GMM instance data.
4249 * @param pGVM Pointer to the Global VM structure.
4250 * @param pChunk Pointer to the chunk to be unmapped.
4251 * @param fRelaxedSem Whether we can release the semaphore while doing the
4252 * mapping (@c true) or not.
4253 */
4254static int gmmR0UnmapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem)
4255{
4256 /*
4257 * Lock the chunk and if possible leave the giant GMM lock.
4258 */
4259 GMMR0CHUNKMTXSTATE MtxState;
4260 int rc = gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk,
4261 fRelaxedSem ? GMMR0CHUNK_MTX_RETAKE_GIANT : GMMR0CHUNK_MTX_KEEP_GIANT);
4262 if (RT_SUCCESS(rc))
4263 {
4264 rc = gmmR0UnmapChunkLocked(pGMM, pGVM, pChunk);
4265 gmmR0ChunkMutexRelease(&MtxState, pChunk);
4266 }
4267 return rc;
4268}
4269
4270
4271/**
4272 * Worker for gmmR0MapChunk.
4273 *
4274 * @returns VBox status code.
4275 * @param pGMM Pointer to the GMM instance data.
4276 * @param pGVM Pointer to the Global VM structure.
4277 * @param pChunk Pointer to the chunk to be mapped.
4278 * @param ppvR3 Where to store the ring-3 address of the mapping.
4279 * In the VERR_GMM_CHUNK_ALREADY_MAPPED case, this will be
4280 * contain the address of the existing mapping.
4281 */
4282static int gmmR0MapChunkLocked(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
4283{
4284 RT_NOREF(pGMM);
4285
4286 /*
4287 * Check to see if the chunk is already mapped.
4288 */
4289 for (uint32_t i = 0; i < pChunk->cMappingsX; i++)
4290 {
4291 Assert(pChunk->paMappingsX[i].pGVM && pChunk->paMappingsX[i].hMapObj != NIL_RTR0MEMOBJ);
4292 if (pChunk->paMappingsX[i].pGVM == pGVM)
4293 {
4294 *ppvR3 = RTR0MemObjAddressR3(pChunk->paMappingsX[i].hMapObj);
4295 Log(("gmmR0MapChunk: chunk %#x is already mapped at %p!\n", pChunk->Core.Key, *ppvR3));
4296#ifdef VBOX_WITH_PAGE_SHARING
4297 /* The ring-3 chunk cache can be out of sync; don't fail. */
4298 return VINF_SUCCESS;
4299#else
4300 return VERR_GMM_CHUNK_ALREADY_MAPPED;
4301#endif
4302 }
4303 }
4304
4305 /*
4306 * Do the mapping.
4307 */
4308 RTR0MEMOBJ hMapObj;
4309 int rc = RTR0MemObjMapUser(&hMapObj, pChunk->hMemObj, (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
4310 if (RT_SUCCESS(rc))
4311 {
4312 /* reallocate the array? assumes few users per chunk (usually one). */
4313 unsigned iMapping = pChunk->cMappingsX;
4314 if ( iMapping <= 3
4315 || (iMapping & 3) == 0)
4316 {
4317 unsigned cNewSize = iMapping <= 3
4318 ? iMapping + 1
4319 : iMapping + 4;
4320 Assert(cNewSize < 4 || RT_ALIGN_32(cNewSize, 4) == cNewSize);
4321 if (RT_UNLIKELY(cNewSize > UINT16_MAX))
4322 {
4323 rc = RTR0MemObjFree(hMapObj, false /* fFreeMappings (NA) */); AssertRC(rc);
4324 return VERR_GMM_TOO_MANY_CHUNK_MAPPINGS;
4325 }
4326
4327 void *pvMappings = RTMemRealloc(pChunk->paMappingsX, cNewSize * sizeof(pChunk->paMappingsX[0]));
4328 if (RT_UNLIKELY(!pvMappings))
4329 {
4330 rc = RTR0MemObjFree(hMapObj, false /* fFreeMappings (NA) */); AssertRC(rc);
4331 return VERR_NO_MEMORY;
4332 }
4333 pChunk->paMappingsX = (PGMMCHUNKMAP)pvMappings;
4334 }
4335
4336 /* insert new entry */
4337 pChunk->paMappingsX[iMapping].hMapObj = hMapObj;
4338 pChunk->paMappingsX[iMapping].pGVM = pGVM;
4339 Assert(pChunk->cMappingsX == iMapping);
4340 pChunk->cMappingsX = iMapping + 1;
4341
4342 *ppvR3 = RTR0MemObjAddressR3(hMapObj);
4343 }
4344
4345 return rc;
4346}
4347
4348
4349/**
4350 * Maps a chunk into the user address space of the current process.
4351 *
4352 * @returns VBox status code.
4353 * @param pGMM Pointer to the GMM instance data.
4354 * @param pGVM Pointer to the Global VM structure.
4355 * @param pChunk Pointer to the chunk to be mapped.
4356 * @param fRelaxedSem Whether we can release the semaphore while doing the
4357 * mapping (@c true) or not.
4358 * @param ppvR3 Where to store the ring-3 address of the mapping.
4359 * In the VERR_GMM_CHUNK_ALREADY_MAPPED case, this will be
4360 * contain the address of the existing mapping.
4361 */
4362static int gmmR0MapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem, PRTR3PTR ppvR3)
4363{
4364 /*
4365 * Take the chunk lock and leave the giant GMM lock when possible, then
4366 * call the worker function.
4367 */
4368 GMMR0CHUNKMTXSTATE MtxState;
4369 int rc = gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk,
4370 fRelaxedSem ? GMMR0CHUNK_MTX_RETAKE_GIANT : GMMR0CHUNK_MTX_KEEP_GIANT);
4371 if (RT_SUCCESS(rc))
4372 {
4373 rc = gmmR0MapChunkLocked(pGMM, pGVM, pChunk, ppvR3);
4374 gmmR0ChunkMutexRelease(&MtxState, pChunk);
4375 }
4376
4377 return rc;
4378}
4379
4380
4381
4382#if defined(VBOX_WITH_PAGE_SHARING) || defined(VBOX_STRICT)
4383/**
4384 * Check if a chunk is mapped into the specified VM
4385 *
4386 * @returns mapped yes/no
4387 * @param pGMM Pointer to the GMM instance.
4388 * @param pGVM Pointer to the Global VM structure.
4389 * @param pChunk Pointer to the chunk to be mapped.
4390 * @param ppvR3 Where to store the ring-3 address of the mapping.
4391 */
4392static bool gmmR0IsChunkMapped(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
4393{
4394 GMMR0CHUNKMTXSTATE MtxState;
4395 gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk, GMMR0CHUNK_MTX_KEEP_GIANT);
4396 for (uint32_t i = 0; i < pChunk->cMappingsX; i++)
4397 {
4398 Assert(pChunk->paMappingsX[i].pGVM && pChunk->paMappingsX[i].hMapObj != NIL_RTR0MEMOBJ);
4399 if (pChunk->paMappingsX[i].pGVM == pGVM)
4400 {
4401 *ppvR3 = RTR0MemObjAddressR3(pChunk->paMappingsX[i].hMapObj);
4402 gmmR0ChunkMutexRelease(&MtxState, pChunk);
4403 return true;
4404 }
4405 }
4406 *ppvR3 = NULL;
4407 gmmR0ChunkMutexRelease(&MtxState, pChunk);
4408 return false;
4409}
4410#endif /* VBOX_WITH_PAGE_SHARING || VBOX_STRICT */
4411
4412
4413/**
4414 * Map a chunk and/or unmap another chunk.
4415 *
4416 * The mapping and unmapping applies to the current process.
4417 *
4418 * This API does two things because it saves a kernel call per mapping when
4419 * when the ring-3 mapping cache is full.
4420 *
4421 * @returns VBox status code.
4422 * @param pGVM The global (ring-0) VM structure.
4423 * @param idChunkMap The chunk to map. NIL_GMM_CHUNKID if nothing to map.
4424 * @param idChunkUnmap The chunk to unmap. NIL_GMM_CHUNKID if nothing to unmap.
4425 * @param ppvR3 Where to store the address of the mapped chunk. NULL is ok if nothing to map.
4426 * @thread EMT ???
4427 */
4428GMMR0DECL(int) GMMR0MapUnmapChunk(PGVM pGVM, uint32_t idChunkMap, uint32_t idChunkUnmap, PRTR3PTR ppvR3)
4429{
4430 LogFlow(("GMMR0MapUnmapChunk: pGVM=%p idChunkMap=%#x idChunkUnmap=%#x ppvR3=%p\n",
4431 pGVM, idChunkMap, idChunkUnmap, ppvR3));
4432
4433 /*
4434 * Validate input and get the basics.
4435 */
4436 PGMM pGMM;
4437 GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
4438 int rc = GVMMR0ValidateGVM(pGVM);
4439 if (RT_FAILURE(rc))
4440 return rc;
4441
4442 AssertCompile(NIL_GMM_CHUNKID == 0);
4443 AssertMsgReturn(idChunkMap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkMap), VERR_INVALID_PARAMETER);
4444 AssertMsgReturn(idChunkUnmap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkUnmap), VERR_INVALID_PARAMETER);
4445
4446 if ( idChunkMap == NIL_GMM_CHUNKID
4447 && idChunkUnmap == NIL_GMM_CHUNKID)
4448 return VERR_INVALID_PARAMETER;
4449
4450 if (idChunkMap != NIL_GMM_CHUNKID)
4451 {
4452 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
4453 *ppvR3 = NIL_RTR3PTR;
4454 }
4455
4456 /*
4457 * Take the semaphore and do the work.
4458 *
4459 * The unmapping is done last since it's easier to undo a mapping than
4460 * undoing an unmapping. The ring-3 mapping cache cannot not be so big
4461 * that it pushes the user virtual address space to within a chunk of
4462 * it it's limits, so, no problem here.
4463 */
4464 gmmR0MutexAcquire(pGMM);
4465 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4466 {
4467 PGMMCHUNK pMap = NULL;
4468 if (idChunkMap != NIL_GVM_HANDLE)
4469 {
4470 pMap = gmmR0GetChunk(pGMM, idChunkMap);
4471 if (RT_LIKELY(pMap))
4472 rc = gmmR0MapChunk(pGMM, pGVM, pMap, true /*fRelaxedSem*/, ppvR3);
4473 else
4474 {
4475 Log(("GMMR0MapUnmapChunk: idChunkMap=%#x\n", idChunkMap));
4476 rc = VERR_GMM_CHUNK_NOT_FOUND;
4477 }
4478 }
4479/** @todo split this operation, the bail out might (theoretcially) not be
4480 * entirely safe. */
4481
4482 if ( idChunkUnmap != NIL_GMM_CHUNKID
4483 && RT_SUCCESS(rc))
4484 {
4485 PGMMCHUNK pUnmap = gmmR0GetChunk(pGMM, idChunkUnmap);
4486 if (RT_LIKELY(pUnmap))
4487 rc = gmmR0UnmapChunk(pGMM, pGVM, pUnmap, true /*fRelaxedSem*/);
4488 else
4489 {
4490 Log(("GMMR0MapUnmapChunk: idChunkUnmap=%#x\n", idChunkUnmap));
4491 rc = VERR_GMM_CHUNK_NOT_FOUND;
4492 }
4493
4494 if (RT_FAILURE(rc) && pMap)
4495 gmmR0UnmapChunk(pGMM, pGVM, pMap, false /*fRelaxedSem*/);
4496 }
4497
4498 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4499 }
4500 else
4501 rc = VERR_GMM_IS_NOT_SANE;
4502 gmmR0MutexRelease(pGMM);
4503
4504 LogFlow(("GMMR0MapUnmapChunk: returns %Rrc\n", rc));
4505 return rc;
4506}
4507
4508
4509/**
4510 * VMMR0 request wrapper for GMMR0MapUnmapChunk.
4511 *
4512 * @returns see GMMR0MapUnmapChunk.
4513 * @param pGVM The global (ring-0) VM structure.
4514 * @param pReq Pointer to the request packet.
4515 */
4516GMMR0DECL(int) GMMR0MapUnmapChunkReq(PGVM pGVM, PGMMMAPUNMAPCHUNKREQ pReq)
4517{
4518 /*
4519 * Validate input and pass it on.
4520 */
4521 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
4522 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
4523
4524 return GMMR0MapUnmapChunk(pGVM, pReq->idChunkMap, pReq->idChunkUnmap, &pReq->pvR3);
4525}
4526
4527
4528#ifndef VBOX_WITH_LINEAR_HOST_PHYS_MEM
4529/**
4530 * Gets the ring-0 virtual address for the given page.
4531 *
4532 * This is used by PGM when IEM and such wants to access guest RAM from ring-0.
4533 * One of the ASSUMPTIONS here is that the @a idPage is used by the VM and the
4534 * corresponding chunk will remain valid beyond the call (at least till the EMT
4535 * returns to ring-3).
4536 *
4537 * @returns VBox status code.
4538 * @param pGVM Pointer to the kernel-only VM instace data.
4539 * @param idPage The page ID.
4540 * @param ppv Where to store the address.
4541 * @thread EMT
4542 */
4543GMMR0DECL(int) GMMR0PageIdToVirt(PGVM pGVM, uint32_t idPage, void **ppv)
4544{
4545 *ppv = NULL;
4546 PGMM pGMM;
4547 GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
4548
4549 uint32_t const idChunk = idPage >> GMM_CHUNKID_SHIFT;
4550
4551 /*
4552 * Start with the per-VM TLB.
4553 */
4554 RTSpinlockAcquire(pGVM->gmm.s.hChunkTlbSpinLock);
4555
4556 PGMMPERVMCHUNKTLBE pTlbe = &pGVM->gmm.s.aChunkTlbEntries[GMMPERVM_CHUNKTLB_IDX(idChunk)];
4557 PGMMCHUNK pChunk = pTlbe->pChunk;
4558 if ( pChunk != NULL
4559 && pTlbe->idGeneration == ASMAtomicUoReadU64(&pGMM->idFreeGeneration)
4560 && pChunk->Core.Key == idChunk)
4561 pGVM->R0Stats.gmm.cChunkTlbHits++; /* hopefully this is a likely outcome */
4562 else
4563 {
4564 pGVM->R0Stats.gmm.cChunkTlbMisses++;
4565
4566 /*
4567 * Look it up in the chunk tree.
4568 */
4569 RTSpinlockAcquire(pGMM->hSpinLockTree);
4570 pChunk = gmmR0GetChunkLocked(pGMM, idChunk);
4571 if (RT_LIKELY(pChunk))
4572 {
4573 pTlbe->idGeneration = pGMM->idFreeGeneration;
4574 RTSpinlockRelease(pGMM->hSpinLockTree);
4575 pTlbe->pChunk = pChunk;
4576 }
4577 else
4578 {
4579 RTSpinlockRelease(pGMM->hSpinLockTree);
4580 RTSpinlockRelease(pGVM->gmm.s.hChunkTlbSpinLock);
4581 AssertMsgFailed(("idPage=%#x\n", idPage));
4582 return VERR_GMM_PAGE_NOT_FOUND;
4583 }
4584 }
4585
4586 RTSpinlockRelease(pGVM->gmm.s.hChunkTlbSpinLock);
4587
4588 /*
4589 * Got a chunk, now validate the page ownership and calcuate it's address.
4590 */
4591 const GMMPAGE * const pPage = &pChunk->aPages[idPage & GMM_PAGEID_IDX_MASK];
4592 if (RT_LIKELY( ( GMM_PAGE_IS_PRIVATE(pPage)
4593 && pPage->Private.hGVM == pGVM->hSelf)
4594 || GMM_PAGE_IS_SHARED(pPage)))
4595 {
4596 AssertPtr(pChunk->pbMapping);
4597 *ppv = &pChunk->pbMapping[(idPage & GMM_PAGEID_IDX_MASK) << GUEST_PAGE_SHIFT];
4598 return VINF_SUCCESS;
4599 }
4600 AssertMsgFailed(("idPage=%#x is-private=%RTbool Private.hGVM=%u pGVM->hGVM=%u\n",
4601 idPage, GMM_PAGE_IS_PRIVATE(pPage), pPage->Private.hGVM, pGVM->hSelf));
4602 return VERR_GMM_NOT_PAGE_OWNER;
4603}
4604#endif /* !VBOX_WITH_LINEAR_HOST_PHYS_MEM */
4605
4606#ifdef VBOX_WITH_PAGE_SHARING
4607
4608# ifdef VBOX_STRICT
4609/**
4610 * For checksumming shared pages in strict builds.
4611 *
4612 * The purpose is making sure that a page doesn't change.
4613 *
4614 * @returns Checksum, 0 on failure.
4615 * @param pGMM The GMM instance data.
4616 * @param pGVM Pointer to the kernel-only VM instace data.
4617 * @param idPage The page ID.
4618 */
4619static uint32_t gmmR0StrictPageChecksum(PGMM pGMM, PGVM pGVM, uint32_t idPage)
4620{
4621 PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
4622 AssertMsgReturn(pChunk, ("idPage=%#x\n", idPage), 0);
4623
4624 uint8_t *pbChunk;
4625 if (!gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
4626 return 0;
4627 uint8_t const *pbPage = pbChunk + ((idPage & GMM_PAGEID_IDX_MASK) << GUEST_PAGE_SHIFT);
4628
4629 return RTCrc32(pbPage, GUEST_PAGE_SIZE);
4630}
4631# endif /* VBOX_STRICT */
4632
4633
4634/**
4635 * Calculates the module hash value.
4636 *
4637 * @returns Hash value.
4638 * @param pszModuleName The module name.
4639 * @param pszVersion The module version string.
4640 */
4641static uint32_t gmmR0ShModCalcHash(const char *pszModuleName, const char *pszVersion)
4642{
4643 return RTStrHash1ExN(3, pszModuleName, RTSTR_MAX, "::", (size_t)2, pszVersion, RTSTR_MAX);
4644}
4645
4646
4647/**
4648 * Finds a global module.
4649 *
4650 * @returns Pointer to the global module on success, NULL if not found.
4651 * @param pGMM The GMM instance data.
4652 * @param uHash The hash as calculated by gmmR0ShModCalcHash.
4653 * @param cbModule The module size.
4654 * @param enmGuestOS The guest OS type.
4655 * @param cRegions The number of regions.
4656 * @param pszModuleName The module name.
4657 * @param pszVersion The module version.
4658 * @param paRegions The region descriptions.
4659 */
4660static PGMMSHAREDMODULE gmmR0ShModFindGlobal(PGMM pGMM, uint32_t uHash, uint32_t cbModule, VBOXOSFAMILY enmGuestOS,
4661 uint32_t cRegions, const char *pszModuleName, const char *pszVersion,
4662 struct VMMDEVSHAREDREGIONDESC const *paRegions)
4663{
4664 for (PGMMSHAREDMODULE pGblMod = (PGMMSHAREDMODULE)RTAvllU32Get(&pGMM->pGlobalSharedModuleTree, uHash);
4665 pGblMod;
4666 pGblMod = (PGMMSHAREDMODULE)pGblMod->Core.pList)
4667 {
4668 if (pGblMod->cbModule != cbModule)
4669 continue;
4670 if (pGblMod->enmGuestOS != enmGuestOS)
4671 continue;
4672 if (pGblMod->cRegions != cRegions)
4673 continue;
4674 if (strcmp(pGblMod->szName, pszModuleName))
4675 continue;
4676 if (strcmp(pGblMod->szVersion, pszVersion))
4677 continue;
4678
4679 uint32_t i;
4680 for (i = 0; i < cRegions; i++)
4681 {
4682 uint32_t off = paRegions[i].GCRegionAddr & GUEST_PAGE_OFFSET_MASK;
4683 if (pGblMod->aRegions[i].off != off)
4684 break;
4685
4686 uint32_t cb = RT_ALIGN_32(paRegions[i].cbRegion + off, GUEST_PAGE_SIZE);
4687 if (pGblMod->aRegions[i].cb != cb)
4688 break;
4689 }
4690
4691 if (i == cRegions)
4692 return pGblMod;
4693 }
4694
4695 return NULL;
4696}
4697
4698
4699/**
4700 * Creates a new global module.
4701 *
4702 * @returns VBox status code.
4703 * @param pGMM The GMM instance data.
4704 * @param uHash The hash as calculated by gmmR0ShModCalcHash.
4705 * @param cbModule The module size.
4706 * @param enmGuestOS The guest OS type.
4707 * @param cRegions The number of regions.
4708 * @param pszModuleName The module name.
4709 * @param pszVersion The module version.
4710 * @param paRegions The region descriptions.
4711 * @param ppGblMod Where to return the new module on success.
4712 */
4713static int gmmR0ShModNewGlobal(PGMM pGMM, uint32_t uHash, uint32_t cbModule, VBOXOSFAMILY enmGuestOS,
4714 uint32_t cRegions, const char *pszModuleName, const char *pszVersion,
4715 struct VMMDEVSHAREDREGIONDESC const *paRegions, PGMMSHAREDMODULE *ppGblMod)
4716{
4717 Log(("gmmR0ShModNewGlobal: %s %s size %#x os %u rgn %u\n", pszModuleName, pszVersion, cbModule, enmGuestOS, cRegions));
4718 if (pGMM->cShareableModules >= GMM_MAX_SHARED_GLOBAL_MODULES)
4719 {
4720 Log(("gmmR0ShModNewGlobal: Too many modules\n"));
4721 return VERR_GMM_TOO_MANY_GLOBAL_MODULES;
4722 }
4723
4724 PGMMSHAREDMODULE pGblMod = (PGMMSHAREDMODULE)RTMemAllocZ(RT_UOFFSETOF_DYN(GMMSHAREDMODULE, aRegions[cRegions]));
4725 if (!pGblMod)
4726 {
4727 Log(("gmmR0ShModNewGlobal: No memory\n"));
4728 return VERR_NO_MEMORY;
4729 }
4730
4731 pGblMod->Core.Key = uHash;
4732 pGblMod->cbModule = cbModule;
4733 pGblMod->cRegions = cRegions;
4734 pGblMod->cUsers = 1;
4735 pGblMod->enmGuestOS = enmGuestOS;
4736 strcpy(pGblMod->szName, pszModuleName);
4737 strcpy(pGblMod->szVersion, pszVersion);
4738
4739 for (uint32_t i = 0; i < cRegions; i++)
4740 {
4741 Log(("gmmR0ShModNewGlobal: rgn[%u]=%RGvLB%#x\n", i, paRegions[i].GCRegionAddr, paRegions[i].cbRegion));
4742 pGblMod->aRegions[i].off = paRegions[i].GCRegionAddr & GUEST_PAGE_OFFSET_MASK;
4743 pGblMod->aRegions[i].cb = paRegions[i].cbRegion + pGblMod->aRegions[i].off;
4744 pGblMod->aRegions[i].cb = RT_ALIGN_32(pGblMod->aRegions[i].cb, GUEST_PAGE_SIZE);
4745 pGblMod->aRegions[i].paidPages = NULL; /* allocated when needed. */
4746 }
4747
4748 bool fInsert = RTAvllU32Insert(&pGMM->pGlobalSharedModuleTree, &pGblMod->Core);
4749 Assert(fInsert); NOREF(fInsert);
4750 pGMM->cShareableModules++;
4751
4752 *ppGblMod = pGblMod;
4753 return VINF_SUCCESS;
4754}
4755
4756
4757/**
4758 * Deletes a global module which is no longer referenced by anyone.
4759 *
4760 * @param pGMM The GMM instance data.
4761 * @param pGblMod The module to delete.
4762 */
4763static void gmmR0ShModDeleteGlobal(PGMM pGMM, PGMMSHAREDMODULE pGblMod)
4764{
4765 Assert(pGblMod->cUsers == 0);
4766 Assert(pGMM->cShareableModules > 0 && pGMM->cShareableModules <= GMM_MAX_SHARED_GLOBAL_MODULES);
4767
4768 void *pvTest = RTAvllU32RemoveNode(&pGMM->pGlobalSharedModuleTree, &pGblMod->Core);
4769 Assert(pvTest == pGblMod); NOREF(pvTest);
4770 pGMM->cShareableModules--;
4771
4772 uint32_t i = pGblMod->cRegions;
4773 while (i-- > 0)
4774 {
4775 if (pGblMod->aRegions[i].paidPages)
4776 {
4777 /* We don't doing anything to the pages as they are handled by the
4778 copy-on-write mechanism in PGM. */
4779 RTMemFree(pGblMod->aRegions[i].paidPages);
4780 pGblMod->aRegions[i].paidPages = NULL;
4781 }
4782 }
4783 RTMemFree(pGblMod);
4784}
4785
4786
4787static int gmmR0ShModNewPerVM(PGVM pGVM, RTGCPTR GCBaseAddr, uint32_t cRegions, const VMMDEVSHAREDREGIONDESC *paRegions,
4788 PGMMSHAREDMODULEPERVM *ppRecVM)
4789{
4790 if (pGVM->gmm.s.Stats.cShareableModules >= GMM_MAX_SHARED_PER_VM_MODULES)
4791 return VERR_GMM_TOO_MANY_PER_VM_MODULES;
4792
4793 PGMMSHAREDMODULEPERVM pRecVM;
4794 pRecVM = (PGMMSHAREDMODULEPERVM)RTMemAllocZ(RT_UOFFSETOF_DYN(GMMSHAREDMODULEPERVM, aRegionsGCPtrs[cRegions]));
4795 if (!pRecVM)
4796 return VERR_NO_MEMORY;
4797
4798 pRecVM->Core.Key = GCBaseAddr;
4799 for (uint32_t i = 0; i < cRegions; i++)
4800 pRecVM->aRegionsGCPtrs[i] = paRegions[i].GCRegionAddr;
4801
4802 bool fInsert = RTAvlGCPtrInsert(&pGVM->gmm.s.pSharedModuleTree, &pRecVM->Core);
4803 Assert(fInsert); NOREF(fInsert);
4804 pGVM->gmm.s.Stats.cShareableModules++;
4805
4806 *ppRecVM = pRecVM;
4807 return VINF_SUCCESS;
4808}
4809
4810
4811static void gmmR0ShModDeletePerVM(PGMM pGMM, PGVM pGVM, PGMMSHAREDMODULEPERVM pRecVM, bool fRemove)
4812{
4813 /*
4814 * Free the per-VM module.
4815 */
4816 PGMMSHAREDMODULE pGblMod = pRecVM->pGlobalModule;
4817 pRecVM->pGlobalModule = NULL;
4818
4819 if (fRemove)
4820 {
4821 void *pvTest = RTAvlGCPtrRemove(&pGVM->gmm.s.pSharedModuleTree, pRecVM->Core.Key);
4822 Assert(pvTest == &pRecVM->Core); NOREF(pvTest);
4823 }
4824
4825 RTMemFree(pRecVM);
4826
4827 /*
4828 * Release the global module.
4829 * (In the registration bailout case, it might not be.)
4830 */
4831 if (pGblMod)
4832 {
4833 Assert(pGblMod->cUsers > 0);
4834 pGblMod->cUsers--;
4835 if (pGblMod->cUsers == 0)
4836 gmmR0ShModDeleteGlobal(pGMM, pGblMod);
4837 }
4838}
4839
4840#endif /* VBOX_WITH_PAGE_SHARING */
4841
4842/**
4843 * Registers a new shared module for the VM.
4844 *
4845 * @returns VBox status code.
4846 * @param pGVM The global (ring-0) VM structure.
4847 * @param idCpu The VCPU id.
4848 * @param enmGuestOS The guest OS type.
4849 * @param pszModuleName The module name.
4850 * @param pszVersion The module version.
4851 * @param GCPtrModBase The module base address.
4852 * @param cbModule The module size.
4853 * @param cRegions The mumber of shared region descriptors.
4854 * @param paRegions Pointer to an array of shared region(s).
4855 * @thread EMT(idCpu)
4856 */
4857GMMR0DECL(int) GMMR0RegisterSharedModule(PGVM pGVM, VMCPUID idCpu, VBOXOSFAMILY enmGuestOS, char *pszModuleName,
4858 char *pszVersion, RTGCPTR GCPtrModBase, uint32_t cbModule,
4859 uint32_t cRegions, struct VMMDEVSHAREDREGIONDESC const *paRegions)
4860{
4861#ifdef VBOX_WITH_PAGE_SHARING
4862 /*
4863 * Validate input and get the basics.
4864 *
4865 * Note! Turns out the module size does necessarily match the size of the
4866 * regions. (iTunes on XP)
4867 */
4868 PGMM pGMM;
4869 GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
4870 int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
4871 if (RT_FAILURE(rc))
4872 return rc;
4873
4874 if (RT_UNLIKELY(cRegions > VMMDEVSHAREDREGIONDESC_MAX))
4875 return VERR_GMM_TOO_MANY_REGIONS;
4876
4877 if (RT_UNLIKELY(cbModule == 0 || cbModule > _1G))
4878 return VERR_GMM_BAD_SHARED_MODULE_SIZE;
4879
4880 uint32_t cbTotal = 0;
4881 for (uint32_t i = 0; i < cRegions; i++)
4882 {
4883 if (RT_UNLIKELY(paRegions[i].cbRegion == 0 || paRegions[i].cbRegion > _1G))
4884 return VERR_GMM_SHARED_MODULE_BAD_REGIONS_SIZE;
4885
4886 cbTotal += paRegions[i].cbRegion;
4887 if (RT_UNLIKELY(cbTotal > _1G))
4888 return VERR_GMM_SHARED_MODULE_BAD_REGIONS_SIZE;
4889 }
4890
4891 AssertPtrReturn(pszModuleName, VERR_INVALID_POINTER);
4892 if (RT_UNLIKELY(!memchr(pszModuleName, '\0', GMM_SHARED_MODULE_MAX_NAME_STRING)))
4893 return VERR_GMM_MODULE_NAME_TOO_LONG;
4894
4895 AssertPtrReturn(pszVersion, VERR_INVALID_POINTER);
4896 if (RT_UNLIKELY(!memchr(pszVersion, '\0', GMM_SHARED_MODULE_MAX_VERSION_STRING)))
4897 return VERR_GMM_MODULE_NAME_TOO_LONG;
4898
4899 uint32_t const uHash = gmmR0ShModCalcHash(pszModuleName, pszVersion);
4900 Log(("GMMR0RegisterSharedModule %s %s base %RGv size %x hash %x\n", pszModuleName, pszVersion, GCPtrModBase, cbModule, uHash));
4901
4902 /*
4903 * Take the semaphore and do some more validations.
4904 */
4905 gmmR0MutexAcquire(pGMM);
4906 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4907 {
4908 /*
4909 * Check if this module is already locally registered and register
4910 * it if it isn't. The base address is a unique module identifier
4911 * locally.
4912 */
4913 PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)RTAvlGCPtrGet(&pGVM->gmm.s.pSharedModuleTree, GCPtrModBase);
4914 bool fNewModule = pRecVM == NULL;
4915 if (fNewModule)
4916 {
4917 rc = gmmR0ShModNewPerVM(pGVM, GCPtrModBase, cRegions, paRegions, &pRecVM);
4918 if (RT_SUCCESS(rc))
4919 {
4920 /*
4921 * Find a matching global module, register a new one if needed.
4922 */
4923 PGMMSHAREDMODULE pGblMod = gmmR0ShModFindGlobal(pGMM, uHash, cbModule, enmGuestOS, cRegions,
4924 pszModuleName, pszVersion, paRegions);
4925 if (!pGblMod)
4926 {
4927 Assert(fNewModule);
4928 rc = gmmR0ShModNewGlobal(pGMM, uHash, cbModule, enmGuestOS, cRegions,
4929 pszModuleName, pszVersion, paRegions, &pGblMod);
4930 if (RT_SUCCESS(rc))
4931 {
4932 pRecVM->pGlobalModule = pGblMod; /* (One referenced returned by gmmR0ShModNewGlobal.) */
4933 Log(("GMMR0RegisterSharedModule: new module %s %s\n", pszModuleName, pszVersion));
4934 }
4935 else
4936 gmmR0ShModDeletePerVM(pGMM, pGVM, pRecVM, true /*fRemove*/);
4937 }
4938 else
4939 {
4940 Assert(pGblMod->cUsers > 0 && pGblMod->cUsers < UINT32_MAX / 2);
4941 pGblMod->cUsers++;
4942 pRecVM->pGlobalModule = pGblMod;
4943
4944 Log(("GMMR0RegisterSharedModule: new per vm module %s %s, gbl users %d\n", pszModuleName, pszVersion, pGblMod->cUsers));
4945 }
4946 }
4947 }
4948 else
4949 {
4950 /*
4951 * Attempt to re-register an existing module.
4952 */
4953 PGMMSHAREDMODULE pGblMod = gmmR0ShModFindGlobal(pGMM, uHash, cbModule, enmGuestOS, cRegions,
4954 pszModuleName, pszVersion, paRegions);
4955 if (pRecVM->pGlobalModule == pGblMod)
4956 {
4957 Log(("GMMR0RegisterSharedModule: already registered %s %s, gbl users %d\n", pszModuleName, pszVersion, pGblMod->cUsers));
4958 rc = VINF_GMM_SHARED_MODULE_ALREADY_REGISTERED;
4959 }
4960 else
4961 {
4962 /** @todo may have to unregister+register when this happens in case it's caused
4963 * by VBoxService crashing and being restarted... */
4964 Log(("GMMR0RegisterSharedModule: Address clash!\n"
4965 " incoming at %RGvLB%#x %s %s rgns %u\n"
4966 " existing at %RGvLB%#x %s %s rgns %u\n",
4967 GCPtrModBase, cbModule, pszModuleName, pszVersion, cRegions,
4968 pRecVM->Core.Key, pRecVM->pGlobalModule->cbModule, pRecVM->pGlobalModule->szName,
4969 pRecVM->pGlobalModule->szVersion, pRecVM->pGlobalModule->cRegions));
4970 rc = VERR_GMM_SHARED_MODULE_ADDRESS_CLASH;
4971 }
4972 }
4973 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4974 }
4975 else
4976 rc = VERR_GMM_IS_NOT_SANE;
4977
4978 gmmR0MutexRelease(pGMM);
4979 return rc;
4980#else
4981
4982 NOREF(pGVM); NOREF(idCpu); NOREF(enmGuestOS); NOREF(pszModuleName); NOREF(pszVersion);
4983 NOREF(GCPtrModBase); NOREF(cbModule); NOREF(cRegions); NOREF(paRegions);
4984 return VERR_NOT_IMPLEMENTED;
4985#endif
4986}
4987
4988
4989/**
4990 * VMMR0 request wrapper for GMMR0RegisterSharedModule.
4991 *
4992 * @returns see GMMR0RegisterSharedModule.
4993 * @param pGVM The global (ring-0) VM structure.
4994 * @param idCpu The VCPU id.
4995 * @param pReq Pointer to the request packet.
4996 */
4997GMMR0DECL(int) GMMR0RegisterSharedModuleReq(PGVM pGVM, VMCPUID idCpu, PGMMREGISTERSHAREDMODULEREQ pReq)
4998{
4999 /*
5000 * Validate input and pass it on.
5001 */
5002 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
5003 AssertMsgReturn( pReq->Hdr.cbReq >= sizeof(*pReq)
5004 && pReq->Hdr.cbReq == RT_UOFFSETOF_DYN(GMMREGISTERSHAREDMODULEREQ, aRegions[pReq->cRegions]),
5005 ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
5006
5007 /* Pass back return code in the request packet to preserve informational codes. (VMMR3CallR0 chokes on them) */
5008 pReq->rc = GMMR0RegisterSharedModule(pGVM, idCpu, pReq->enmGuestOS, pReq->szName, pReq->szVersion,
5009 pReq->GCBaseAddr, pReq->cbModule, pReq->cRegions, pReq->aRegions);
5010 return VINF_SUCCESS;
5011}
5012
5013
5014/**
5015 * Unregisters a shared module for the VM
5016 *
5017 * @returns VBox status code.
5018 * @param pGVM The global (ring-0) VM structure.
5019 * @param idCpu The VCPU id.
5020 * @param pszModuleName The module name.
5021 * @param pszVersion The module version.
5022 * @param GCPtrModBase The module base address.
5023 * @param cbModule The module size.
5024 */
5025GMMR0DECL(int) GMMR0UnregisterSharedModule(PGVM pGVM, VMCPUID idCpu, char *pszModuleName, char *pszVersion,
5026 RTGCPTR GCPtrModBase, uint32_t cbModule)
5027{
5028#ifdef VBOX_WITH_PAGE_SHARING
5029 /*
5030 * Validate input and get the basics.
5031 */
5032 PGMM pGMM;
5033 GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
5034 int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
5035 if (RT_FAILURE(rc))
5036 return rc;
5037
5038 AssertPtrReturn(pszModuleName, VERR_INVALID_POINTER);
5039 AssertPtrReturn(pszVersion, VERR_INVALID_POINTER);
5040 if (RT_UNLIKELY(!memchr(pszModuleName, '\0', GMM_SHARED_MODULE_MAX_NAME_STRING)))
5041 return VERR_GMM_MODULE_NAME_TOO_LONG;
5042 if (RT_UNLIKELY(!memchr(pszVersion, '\0', GMM_SHARED_MODULE_MAX_VERSION_STRING)))
5043 return VERR_GMM_MODULE_NAME_TOO_LONG;
5044
5045 Log(("GMMR0UnregisterSharedModule %s %s base=%RGv size %x\n", pszModuleName, pszVersion, GCPtrModBase, cbModule));
5046
5047 /*
5048 * Take the semaphore and do some more validations.
5049 */
5050 gmmR0MutexAcquire(pGMM);
5051 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
5052 {
5053 /*
5054 * Locate and remove the specified module.
5055 */
5056 PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)RTAvlGCPtrGet(&pGVM->gmm.s.pSharedModuleTree, GCPtrModBase);
5057 if (pRecVM)
5058 {
5059 /** @todo Do we need to do more validations here, like that the
5060 * name + version + cbModule matches? */
5061 NOREF(cbModule);
5062 Assert(pRecVM->pGlobalModule);
5063 gmmR0ShModDeletePerVM(pGMM, pGVM, pRecVM, true /*fRemove*/);
5064 }
5065 else
5066 rc = VERR_GMM_SHARED_MODULE_NOT_FOUND;
5067
5068 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
5069 }
5070 else
5071 rc = VERR_GMM_IS_NOT_SANE;
5072
5073 gmmR0MutexRelease(pGMM);
5074 return rc;
5075#else
5076
5077 NOREF(pGVM); NOREF(idCpu); NOREF(pszModuleName); NOREF(pszVersion); NOREF(GCPtrModBase); NOREF(cbModule);
5078 return VERR_NOT_IMPLEMENTED;
5079#endif
5080}
5081
5082
5083/**
5084 * VMMR0 request wrapper for GMMR0UnregisterSharedModule.
5085 *
5086 * @returns see GMMR0UnregisterSharedModule.
5087 * @param pGVM The global (ring-0) VM structure.
5088 * @param idCpu The VCPU id.
5089 * @param pReq Pointer to the request packet.
5090 */
5091GMMR0DECL(int) GMMR0UnregisterSharedModuleReq(PGVM pGVM, VMCPUID idCpu, PGMMUNREGISTERSHAREDMODULEREQ pReq)
5092{
5093 /*
5094 * Validate input and pass it on.
5095 */
5096 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
5097 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
5098
5099 return GMMR0UnregisterSharedModule(pGVM, idCpu, pReq->szName, pReq->szVersion, pReq->GCBaseAddr, pReq->cbModule);
5100}
5101
5102#ifdef VBOX_WITH_PAGE_SHARING
5103
5104/**
5105 * Increase the use count of a shared page, the page is known to exist and be valid and such.
5106 *
5107 * @param pGMM Pointer to the GMM instance.
5108 * @param pGVM Pointer to the GVM instance.
5109 * @param pPage The page structure.
5110 */
5111DECLINLINE(void) gmmR0UseSharedPage(PGMM pGMM, PGVM pGVM, PGMMPAGE pPage)
5112{
5113 Assert(pGMM->cSharedPages > 0);
5114 Assert(pGMM->cAllocatedPages > 0);
5115
5116 pGMM->cDuplicatePages++;
5117
5118 pPage->Shared.cRefs++;
5119 pGVM->gmm.s.Stats.cSharedPages++;
5120 pGVM->gmm.s.Stats.Allocated.cBasePages++;
5121}
5122
5123
5124/**
5125 * Converts a private page to a shared page, the page is known to exist and be valid and such.
5126 *
5127 * @param pGMM Pointer to the GMM instance.
5128 * @param pGVM Pointer to the GVM instance.
5129 * @param HCPhys Host physical address
5130 * @param idPage The Page ID
5131 * @param pPage The page structure.
5132 * @param pPageDesc Shared page descriptor
5133 */
5134DECLINLINE(void) gmmR0ConvertToSharedPage(PGMM pGMM, PGVM pGVM, RTHCPHYS HCPhys, uint32_t idPage, PGMMPAGE pPage,
5135 PGMMSHAREDPAGEDESC pPageDesc)
5136{
5137 PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
5138 Assert(pChunk);
5139 Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
5140 Assert(GMM_PAGE_IS_PRIVATE(pPage));
5141
5142 pChunk->cPrivate--;
5143 pChunk->cShared++;
5144
5145 pGMM->cSharedPages++;
5146
5147 pGVM->gmm.s.Stats.cSharedPages++;
5148 pGVM->gmm.s.Stats.cPrivatePages--;
5149
5150 /* Modify the page structure. */
5151 pPage->Shared.pfn = (uint32_t)(uint64_t)(HCPhys >> GUEST_PAGE_SHIFT);
5152 pPage->Shared.cRefs = 1;
5153#ifdef VBOX_STRICT
5154 pPageDesc->u32StrictChecksum = gmmR0StrictPageChecksum(pGMM, pGVM, idPage);
5155 pPage->Shared.u14Checksum = pPageDesc->u32StrictChecksum;
5156#else
5157 NOREF(pPageDesc);
5158 pPage->Shared.u14Checksum = 0;
5159#endif
5160 pPage->Shared.u2State = GMM_PAGE_STATE_SHARED;
5161}
5162
5163
5164static int gmmR0SharedModuleCheckPageFirstTime(PGMM pGMM, PGVM pGVM, PGMMSHAREDMODULE pModule,
5165 unsigned idxRegion, unsigned idxPage,
5166 PGMMSHAREDPAGEDESC pPageDesc, PGMMSHAREDREGIONDESC pGlobalRegion)
5167{
5168 NOREF(pModule);
5169
5170 /* Easy case: just change the internal page type. */
5171 PGMMPAGE pPage = gmmR0GetPage(pGMM, pPageDesc->idPage);
5172 AssertMsgReturn(pPage, ("idPage=%#x (GCPhys=%RGp HCPhys=%RHp idxRegion=%#x idxPage=%#x) #1\n",
5173 pPageDesc->idPage, pPageDesc->GCPhys, pPageDesc->HCPhys, idxRegion, idxPage),
5174 VERR_PGM_PHYS_INVALID_PAGE_ID);
5175 NOREF(idxRegion);
5176
5177 AssertMsg(pPageDesc->GCPhys == (pPage->Private.pfn << 12), ("desc %RGp gmm %RGp\n", pPageDesc->HCPhys, (pPage->Private.pfn << 12)));
5178
5179 gmmR0ConvertToSharedPage(pGMM, pGVM, pPageDesc->HCPhys, pPageDesc->idPage, pPage, pPageDesc);
5180
5181 /* Keep track of these references. */
5182 pGlobalRegion->paidPages[idxPage] = pPageDesc->idPage;
5183
5184 return VINF_SUCCESS;
5185}
5186
5187/**
5188 * Checks specified shared module range for changes
5189 *
5190 * Performs the following tasks:
5191 * - If a shared page is new, then it changes the GMM page type to shared and
5192 * returns it in the pPageDesc descriptor.
5193 * - If a shared page already exists, then it checks if the VM page is
5194 * identical and if so frees the VM page and returns the shared page in
5195 * pPageDesc descriptor.
5196 *
5197 * @remarks ASSUMES the caller has acquired the GMM semaphore!!
5198 *
5199 * @returns VBox status code.
5200 * @param pGVM Pointer to the GVM instance data.
5201 * @param pModule Module description
5202 * @param idxRegion Region index
5203 * @param idxPage Page index
5204 * @param pPageDesc Page descriptor
5205 */
5206GMMR0DECL(int) GMMR0SharedModuleCheckPage(PGVM pGVM, PGMMSHAREDMODULE pModule, uint32_t idxRegion, uint32_t idxPage,
5207 PGMMSHAREDPAGEDESC pPageDesc)
5208{
5209 int rc;
5210 PGMM pGMM;
5211 GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
5212 pPageDesc->u32StrictChecksum = 0;
5213
5214 AssertMsgReturn(idxRegion < pModule->cRegions,
5215 ("idxRegion=%#x cRegions=%#x %s %s\n", idxRegion, pModule->cRegions, pModule->szName, pModule->szVersion),
5216 VERR_INVALID_PARAMETER);
5217
5218 uint32_t const cPages = pModule->aRegions[idxRegion].cb >> GUEST_PAGE_SHIFT;
5219 AssertMsgReturn(idxPage < cPages,
5220 ("idxRegion=%#x cRegions=%#x %s %s\n", idxRegion, pModule->cRegions, pModule->szName, pModule->szVersion),
5221 VERR_INVALID_PARAMETER);
5222
5223 LogFlow(("GMMR0SharedModuleCheckRange %s base %RGv region %d idxPage %d\n", pModule->szName, pModule->Core.Key, idxRegion, idxPage));
5224
5225 /*
5226 * First time; create a page descriptor array.
5227 */
5228 PGMMSHAREDREGIONDESC pGlobalRegion = &pModule->aRegions[idxRegion];
5229 if (!pGlobalRegion->paidPages)
5230 {
5231 Log(("Allocate page descriptor array for %d pages\n", cPages));
5232 pGlobalRegion->paidPages = (uint32_t *)RTMemAlloc(cPages * sizeof(pGlobalRegion->paidPages[0]));
5233 AssertReturn(pGlobalRegion->paidPages, VERR_NO_MEMORY);
5234
5235 /* Invalidate all descriptors. */
5236 uint32_t i = cPages;
5237 while (i-- > 0)
5238 pGlobalRegion->paidPages[i] = NIL_GMM_PAGEID;
5239 }
5240
5241 /*
5242 * We've seen this shared page for the first time?
5243 */
5244 if (pGlobalRegion->paidPages[idxPage] == NIL_GMM_PAGEID)
5245 {
5246 Log(("New shared page guest %RGp host %RHp\n", pPageDesc->GCPhys, pPageDesc->HCPhys));
5247 return gmmR0SharedModuleCheckPageFirstTime(pGMM, pGVM, pModule, idxRegion, idxPage, pPageDesc, pGlobalRegion);
5248 }
5249
5250 /*
5251 * We've seen it before...
5252 */
5253 Log(("Replace existing page guest %RGp host %RHp id %#x -> id %#x\n",
5254 pPageDesc->GCPhys, pPageDesc->HCPhys, pPageDesc->idPage, pGlobalRegion->paidPages[idxPage]));
5255 Assert(pPageDesc->idPage != pGlobalRegion->paidPages[idxPage]);
5256
5257 /*
5258 * Get the shared page source.
5259 */
5260 PGMMPAGE pPage = gmmR0GetPage(pGMM, pGlobalRegion->paidPages[idxPage]);
5261 AssertMsgReturn(pPage, ("idPage=%#x (idxRegion=%#x idxPage=%#x) #2\n", pPageDesc->idPage, idxRegion, idxPage),
5262 VERR_PGM_PHYS_INVALID_PAGE_ID);
5263
5264 if (pPage->Common.u2State != GMM_PAGE_STATE_SHARED)
5265 {
5266 /*
5267 * Page was freed at some point; invalidate this entry.
5268 */
5269 /** @todo this isn't really bullet proof. */
5270 Log(("Old shared page was freed -> create a new one\n"));
5271 pGlobalRegion->paidPages[idxPage] = NIL_GMM_PAGEID;
5272 return gmmR0SharedModuleCheckPageFirstTime(pGMM, pGVM, pModule, idxRegion, idxPage, pPageDesc, pGlobalRegion);
5273 }
5274
5275 Log(("Replace existing page guest host %RHp -> %RHp\n", pPageDesc->HCPhys, ((uint64_t)pPage->Shared.pfn) << GUEST_PAGE_SHIFT));
5276
5277 /*
5278 * Calculate the virtual address of the local page.
5279 */
5280 PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, pPageDesc->idPage >> GMM_CHUNKID_SHIFT);
5281 AssertMsgReturn(pChunk, ("idPage=%#x (idxRegion=%#x idxPage=%#x) #4\n", pPageDesc->idPage, idxRegion, idxPage),
5282 VERR_PGM_PHYS_INVALID_PAGE_ID);
5283
5284 uint8_t *pbChunk;
5285 AssertMsgReturn(gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk),
5286 ("idPage=%#x (idxRegion=%#x idxPage=%#x) #3\n", pPageDesc->idPage, idxRegion, idxPage),
5287 VERR_PGM_PHYS_INVALID_PAGE_ID);
5288 uint8_t *pbLocalPage = pbChunk + ((pPageDesc->idPage & GMM_PAGEID_IDX_MASK) << GUEST_PAGE_SHIFT);
5289
5290 /*
5291 * Calculate the virtual address of the shared page.
5292 */
5293 pChunk = gmmR0GetChunk(pGMM, pGlobalRegion->paidPages[idxPage] >> GMM_CHUNKID_SHIFT);
5294 Assert(pChunk); /* can't fail as gmmR0GetPage succeeded. */
5295
5296 /*
5297 * Get the virtual address of the physical page; map the chunk into the VM
5298 * process if not already done.
5299 */
5300 if (!gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
5301 {
5302 Log(("Map chunk into process!\n"));
5303 rc = gmmR0MapChunk(pGMM, pGVM, pChunk, false /*fRelaxedSem*/, (PRTR3PTR)&pbChunk);
5304 AssertRCReturn(rc, rc);
5305 }
5306 uint8_t *pbSharedPage = pbChunk + ((pGlobalRegion->paidPages[idxPage] & GMM_PAGEID_IDX_MASK) << GUEST_PAGE_SHIFT);
5307
5308#ifdef VBOX_STRICT
5309 pPageDesc->u32StrictChecksum = RTCrc32(pbSharedPage, GUEST_PAGE_SIZE);
5310 uint32_t uChecksum = pPageDesc->u32StrictChecksum & UINT32_C(0x00003fff);
5311 AssertMsg(!uChecksum || uChecksum == pPage->Shared.u14Checksum || !pPage->Shared.u14Checksum,
5312 ("%#x vs %#x - idPage=%#x - %s %s\n", uChecksum, pPage->Shared.u14Checksum,
5313 pGlobalRegion->paidPages[idxPage], pModule->szName, pModule->szVersion));
5314#endif
5315
5316 if (memcmp(pbSharedPage, pbLocalPage, GUEST_PAGE_SIZE))
5317 {
5318 Log(("Unexpected differences found between local and shared page; skip\n"));
5319 /* Signal to the caller that this one hasn't changed. */
5320 pPageDesc->idPage = NIL_GMM_PAGEID;
5321 return VINF_SUCCESS;
5322 }
5323
5324 /*
5325 * Free the old local page.
5326 */
5327 GMMFREEPAGEDESC PageDesc;
5328 PageDesc.idPage = pPageDesc->idPage;
5329 rc = gmmR0FreePages(pGMM, pGVM, 1, &PageDesc, GMMACCOUNT_BASE);
5330 AssertRCReturn(rc, rc);
5331
5332 gmmR0UseSharedPage(pGMM, pGVM, pPage);
5333
5334 /*
5335 * Pass along the new physical address & page id.
5336 */
5337 pPageDesc->HCPhys = ((uint64_t)pPage->Shared.pfn) << GUEST_PAGE_SHIFT;
5338 pPageDesc->idPage = pGlobalRegion->paidPages[idxPage];
5339
5340 return VINF_SUCCESS;
5341}
5342
5343
5344/**
5345 * RTAvlGCPtrDestroy callback.
5346 *
5347 * @returns 0 or VERR_GMM_INSTANCE.
5348 * @param pNode The node to destroy.
5349 * @param pvArgs Pointer to an argument packet.
5350 */
5351static DECLCALLBACK(int) gmmR0CleanupSharedModule(PAVLGCPTRNODECORE pNode, void *pvArgs)
5352{
5353 gmmR0ShModDeletePerVM(((GMMR0SHMODPERVMDTORARGS *)pvArgs)->pGMM,
5354 ((GMMR0SHMODPERVMDTORARGS *)pvArgs)->pGVM,
5355 (PGMMSHAREDMODULEPERVM)pNode,
5356 false /*fRemove*/);
5357 return VINF_SUCCESS;
5358}
5359
5360
5361/**
5362 * Used by GMMR0CleanupVM to clean up shared modules.
5363 *
5364 * This is called without taking the GMM lock so that it can be yielded as
5365 * needed here.
5366 *
5367 * @param pGMM The GMM handle.
5368 * @param pGVM The global VM handle.
5369 */
5370static void gmmR0SharedModuleCleanup(PGMM pGMM, PGVM pGVM)
5371{
5372 gmmR0MutexAcquire(pGMM);
5373 GMM_CHECK_SANITY_UPON_ENTERING(pGMM);
5374
5375 GMMR0SHMODPERVMDTORARGS Args;
5376 Args.pGVM = pGVM;
5377 Args.pGMM = pGMM;
5378 RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, &Args);
5379
5380 AssertMsg(pGVM->gmm.s.Stats.cShareableModules == 0, ("%d\n", pGVM->gmm.s.Stats.cShareableModules));
5381 pGVM->gmm.s.Stats.cShareableModules = 0;
5382
5383 gmmR0MutexRelease(pGMM);
5384}
5385
5386#endif /* VBOX_WITH_PAGE_SHARING */
5387
5388/**
5389 * Removes all shared modules for the specified VM
5390 *
5391 * @returns VBox status code.
5392 * @param pGVM The global (ring-0) VM structure.
5393 * @param idCpu The VCPU id.
5394 */
5395GMMR0DECL(int) GMMR0ResetSharedModules(PGVM pGVM, VMCPUID idCpu)
5396{
5397#ifdef VBOX_WITH_PAGE_SHARING
5398 /*
5399 * Validate input and get the basics.
5400 */
5401 PGMM pGMM;
5402 GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
5403 int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
5404 if (RT_FAILURE(rc))
5405 return rc;
5406
5407 /*
5408 * Take the semaphore and do some more validations.
5409 */
5410 gmmR0MutexAcquire(pGMM);
5411 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
5412 {
5413 Log(("GMMR0ResetSharedModules\n"));
5414 GMMR0SHMODPERVMDTORARGS Args;
5415 Args.pGVM = pGVM;
5416 Args.pGMM = pGMM;
5417 RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, &Args);
5418 pGVM->gmm.s.Stats.cShareableModules = 0;
5419
5420 rc = VINF_SUCCESS;
5421 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
5422 }
5423 else
5424 rc = VERR_GMM_IS_NOT_SANE;
5425
5426 gmmR0MutexRelease(pGMM);
5427 return rc;
5428#else
5429 RT_NOREF(pGVM, idCpu);
5430 return VERR_NOT_IMPLEMENTED;
5431#endif
5432}
5433
5434#ifdef VBOX_WITH_PAGE_SHARING
5435
5436/**
5437 * Tree enumeration callback for checking a shared module.
5438 */
5439static DECLCALLBACK(int) gmmR0CheckSharedModule(PAVLGCPTRNODECORE pNode, void *pvUser)
5440{
5441 GMMCHECKSHAREDMODULEINFO *pArgs = (GMMCHECKSHAREDMODULEINFO*)pvUser;
5442 PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)pNode;
5443 PGMMSHAREDMODULE pGblMod = pRecVM->pGlobalModule;
5444
5445 Log(("gmmR0CheckSharedModule: check %s %s base=%RGv size=%x\n",
5446 pGblMod->szName, pGblMod->szVersion, pGblMod->Core.Key, pGblMod->cbModule));
5447
5448 int rc = PGMR0SharedModuleCheck(pArgs->pGVM, pArgs->pGVM, pArgs->idCpu, pGblMod, pRecVM->aRegionsGCPtrs);
5449 if (RT_FAILURE(rc))
5450 return rc;
5451 return VINF_SUCCESS;
5452}
5453
5454#endif /* VBOX_WITH_PAGE_SHARING */
5455
5456/**
5457 * Check all shared modules for the specified VM.
5458 *
5459 * @returns VBox status code.
5460 * @param pGVM The global (ring-0) VM structure.
5461 * @param idCpu The calling EMT number.
5462 * @thread EMT(idCpu)
5463 */
5464GMMR0DECL(int) GMMR0CheckSharedModules(PGVM pGVM, VMCPUID idCpu)
5465{
5466#ifdef VBOX_WITH_PAGE_SHARING
5467 /*
5468 * Validate input and get the basics.
5469 */
5470 PGMM pGMM;
5471 GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
5472 int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
5473 if (RT_FAILURE(rc))
5474 return rc;
5475
5476# ifndef DEBUG_sandervl
5477 /*
5478 * Take the semaphore and do some more validations.
5479 */
5480 gmmR0MutexAcquire(pGMM);
5481# endif
5482 if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
5483 {
5484 /*
5485 * Walk the tree, checking each module.
5486 */
5487 Log(("GMMR0CheckSharedModules\n"));
5488
5489 GMMCHECKSHAREDMODULEINFO Args;
5490 Args.pGVM = pGVM;
5491 Args.idCpu = idCpu;
5492 rc = RTAvlGCPtrDoWithAll(&pGVM->gmm.s.pSharedModuleTree, true /* fFromLeft */, gmmR0CheckSharedModule, &Args);
5493
5494 Log(("GMMR0CheckSharedModules done (rc=%Rrc)!\n", rc));
5495 GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
5496 }
5497 else
5498 rc = VERR_GMM_IS_NOT_SANE;
5499
5500# ifndef DEBUG_sandervl
5501 gmmR0MutexRelease(pGMM);
5502# endif
5503 return rc;
5504#else
5505 RT_NOREF(pGVM, idCpu);
5506 return VERR_NOT_IMPLEMENTED;
5507#endif
5508}
5509
5510#ifdef VBOX_STRICT
5511
5512/**
5513 * Worker for GMMR0FindDuplicatePageReq.
5514 *
5515 * @returns true if duplicate, false if not.
5516 */
5517static bool gmmR0FindDupPageInChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, uint8_t const *pbSourcePage)
5518{
5519 bool fFoundDuplicate = false;
5520 /* Only take chunks not mapped into this VM process; not entirely correct. */
5521 uint8_t *pbChunk;
5522 if (!gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
5523 {
5524 int rc = gmmR0MapChunk(pGMM, pGVM, pChunk, false /*fRelaxedSem*/, (PRTR3PTR)&pbChunk);
5525 if (RT_SUCCESS(rc))
5526 {
5527 /*
5528 * Look for duplicate pages
5529 */
5530 uintptr_t iPage = GMM_CHUNK_NUM_PAGES;
5531 while (iPage-- > 0)
5532 {
5533 if (GMM_PAGE_IS_PRIVATE(&pChunk->aPages[iPage]))
5534 {
5535 uint8_t *pbDestPage = pbChunk + (iPage << GUEST_PAGE_SHIFT);
5536 if (!memcmp(pbSourcePage, pbDestPage, GUEST_PAGE_SIZE))
5537 {
5538 fFoundDuplicate = true;
5539 break;
5540 }
5541 }
5542 }
5543 gmmR0UnmapChunk(pGMM, pGVM, pChunk, false /*fRelaxedSem*/);
5544 }
5545 }
5546 return fFoundDuplicate;
5547}
5548
5549
5550/**
5551 * Find a duplicate of the specified page in other active VMs
5552 *
5553 * @returns VBox status code.
5554 * @param pGVM The global (ring-0) VM structure.
5555 * @param pReq Pointer to the request packet.
5556 */
5557GMMR0DECL(int) GMMR0FindDuplicatePageReq(PGVM pGVM, PGMMFINDDUPLICATEPAGEREQ pReq)
5558{
5559 /*
5560 * Validate input and pass it on.
5561 */
5562