VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PDMBlkCache.cpp@ 96407

Last change on this file since 96407 was 96407, checked in by vboxsync, 22 months ago

scm copyright and license note update

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 100.6 KB
Line 
1/* $Id: PDMBlkCache.cpp 96407 2022-08-22 17:43:14Z vboxsync $ */
2/** @file
3 * PDM Block Cache.
4 */
5
6/*
7 * Copyright (C) 2006-2022 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28/** @page pg_pdm_block_cache PDM Block Cache - The I/O cache
29 * This component implements an I/O cache based on the 2Q cache algorithm.
30 */
31
32
33/*********************************************************************************************************************************
34* Header Files *
35*********************************************************************************************************************************/
36#define LOG_GROUP LOG_GROUP_PDM_BLK_CACHE
37#include "PDMInternal.h"
38#include <iprt/asm.h>
39#include <iprt/mem.h>
40#include <iprt/path.h>
41#include <iprt/string.h>
42#include <iprt/trace.h>
43#include <VBox/log.h>
44#include <VBox/vmm/stam.h>
45#include <VBox/vmm/uvm.h>
46#include <VBox/vmm/vm.h>
47
48#include "PDMBlkCacheInternal.h"
49
50
51/*********************************************************************************************************************************
52* Defined Constants And Macros *
53*********************************************************************************************************************************/
54#ifdef VBOX_STRICT
55# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
56 do \
57 { \
58 AssertMsg(RTCritSectIsOwner(&Cache->CritSect), \
59 ("Thread does not own critical section\n"));\
60 } while (0)
61
62# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) \
63 do \
64 { \
65 AssertMsg(RTSemRWIsWriteOwner(pEpCache->SemRWEntries), \
66 ("Thread is not exclusive owner of the per endpoint RW semaphore\n")); \
67 } while (0)
68
69# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) \
70 do \
71 { \
72 AssertMsg(RTSemRWIsReadOwner(pEpCache->SemRWEntries), \
73 ("Thread is not read owner of the per endpoint RW semaphore\n")); \
74 } while (0)
75
76#else
77# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while (0)
78# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) do { } while (0)
79# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) do { } while (0)
80#endif
81
82#define PDM_BLK_CACHE_SAVED_STATE_VERSION 1
83
84/* Enable to enable some tracing in the block cache code for investigating issues. */
85/*#define VBOX_BLKCACHE_TRACING 1*/
86
87
88/*********************************************************************************************************************************
89* Internal Functions *
90*********************************************************************************************************************************/
91
92static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache,
93 uint64_t off, size_t cbData, uint8_t *pbBuffer);
94static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry);
95
96
97/**
98 * Add message to the VM trace buffer.
99 *
100 * @returns nothing.
101 * @param pBlkCache The block cache.
102 * @param pszFmt The format string.
103 * @param ... Additional parameters for the string formatter.
104 */
105DECLINLINE(void) pdmBlkCacheR3TraceMsgF(PPDMBLKCACHE pBlkCache, const char *pszFmt, ...)
106{
107#if defined(VBOX_BLKCACHE_TRACING)
108 va_list va;
109 va_start(va, pszFmt);
110 RTTraceBufAddMsgV(pBlkCache->pCache->pVM->CTX_SUFF(hTraceBuf), pszFmt, va);
111 va_end(va);
112#else
113 RT_NOREF2(pBlkCache, pszFmt);
114#endif
115}
116
117/**
118 * Decrement the reference counter of the given cache entry.
119 *
120 * @returns nothing.
121 * @param pEntry The entry to release.
122 */
123DECLINLINE(void) pdmBlkCacheEntryRelease(PPDMBLKCACHEENTRY pEntry)
124{
125 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
126 ASMAtomicDecU32(&pEntry->cRefs);
127}
128
129/**
130 * Increment the reference counter of the given cache entry.
131 *
132 * @returns nothing.
133 * @param pEntry The entry to reference.
134 */
135DECLINLINE(void) pdmBlkCacheEntryRef(PPDMBLKCACHEENTRY pEntry)
136{
137 ASMAtomicIncU32(&pEntry->cRefs);
138}
139
140#ifdef VBOX_STRICT
141static void pdmBlkCacheValidate(PPDMBLKCACHEGLOBAL pCache)
142{
143 /* Amount of cached data should never exceed the maximum amount. */
144 AssertMsg(pCache->cbCached <= pCache->cbMax,
145 ("Current amount of cached data exceeds maximum\n"));
146
147 /* The amount of cached data in the LRU and FRU list should match cbCached */
148 AssertMsg(pCache->LruRecentlyUsedIn.cbCached + pCache->LruFrequentlyUsed.cbCached == pCache->cbCached,
149 ("Amount of cached data doesn't match\n"));
150
151 AssertMsg(pCache->LruRecentlyUsedOut.cbCached <= pCache->cbRecentlyUsedOutMax,
152 ("Paged out list exceeds maximum\n"));
153}
154#endif
155
156DECLINLINE(void) pdmBlkCacheLockEnter(PPDMBLKCACHEGLOBAL pCache)
157{
158 RTCritSectEnter(&pCache->CritSect);
159#ifdef VBOX_STRICT
160 pdmBlkCacheValidate(pCache);
161#endif
162}
163
164DECLINLINE(void) pdmBlkCacheLockLeave(PPDMBLKCACHEGLOBAL pCache)
165{
166#ifdef VBOX_STRICT
167 pdmBlkCacheValidate(pCache);
168#endif
169 RTCritSectLeave(&pCache->CritSect);
170}
171
172DECLINLINE(void) pdmBlkCacheSub(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
173{
174 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
175 pCache->cbCached -= cbAmount;
176}
177
178DECLINLINE(void) pdmBlkCacheAdd(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
179{
180 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
181 pCache->cbCached += cbAmount;
182}
183
184DECLINLINE(void) pdmBlkCacheListAdd(PPDMBLKLRULIST pList, uint32_t cbAmount)
185{
186 pList->cbCached += cbAmount;
187}
188
189DECLINLINE(void) pdmBlkCacheListSub(PPDMBLKLRULIST pList, uint32_t cbAmount)
190{
191 pList->cbCached -= cbAmount;
192}
193
194#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
195/**
196 * Checks consistency of a LRU list.
197 *
198 * @returns nothing
199 * @param pList The LRU list to check.
200 * @param pNotInList Element which is not allowed to occur in the list.
201 */
202static void pdmBlkCacheCheckList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pNotInList)
203{
204 PPDMBLKCACHEENTRY pCurr = pList->pHead;
205
206 /* Check that there are no double entries and no cycles in the list. */
207 while (pCurr)
208 {
209 PPDMBLKCACHEENTRY pNext = pCurr->pNext;
210
211 while (pNext)
212 {
213 AssertMsg(pCurr != pNext,
214 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
215 pCurr, pList));
216 pNext = pNext->pNext;
217 }
218
219 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
220
221 if (!pCurr->pNext)
222 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
223
224 pCurr = pCurr->pNext;
225 }
226}
227#endif
228
229/**
230 * Unlinks a cache entry from the LRU list it is assigned to.
231 *
232 * @returns nothing.
233 * @param pEntry The entry to unlink.
234 */
235static void pdmBlkCacheEntryRemoveFromList(PPDMBLKCACHEENTRY pEntry)
236{
237 PPDMBLKLRULIST pList = pEntry->pList;
238 PPDMBLKCACHEENTRY pPrev, pNext;
239
240 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
241
242 AssertPtr(pList);
243
244#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
245 pdmBlkCacheCheckList(pList, NULL);
246#endif
247
248 pPrev = pEntry->pPrev;
249 pNext = pEntry->pNext;
250
251 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
252 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
253
254 if (pPrev)
255 pPrev->pNext = pNext;
256 else
257 {
258 pList->pHead = pNext;
259
260 if (pNext)
261 pNext->pPrev = NULL;
262 }
263
264 if (pNext)
265 pNext->pPrev = pPrev;
266 else
267 {
268 pList->pTail = pPrev;
269
270 if (pPrev)
271 pPrev->pNext = NULL;
272 }
273
274 pEntry->pList = NULL;
275 pEntry->pPrev = NULL;
276 pEntry->pNext = NULL;
277 pdmBlkCacheListSub(pList, pEntry->cbData);
278#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
279 pdmBlkCacheCheckList(pList, pEntry);
280#endif
281}
282
283/**
284 * Adds a cache entry to the given LRU list unlinking it from the currently
285 * assigned list if needed.
286 *
287 * @returns nothing.
288 * @param pList List to the add entry to.
289 * @param pEntry Entry to add.
290 */
291static void pdmBlkCacheEntryAddToList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pEntry)
292{
293 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
294#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
295 pdmBlkCacheCheckList(pList, NULL);
296#endif
297
298 /* Remove from old list if needed */
299 if (pEntry->pList)
300 pdmBlkCacheEntryRemoveFromList(pEntry);
301
302 pEntry->pNext = pList->pHead;
303 if (pList->pHead)
304 pList->pHead->pPrev = pEntry;
305 else
306 {
307 Assert(!pList->pTail);
308 pList->pTail = pEntry;
309 }
310
311 pEntry->pPrev = NULL;
312 pList->pHead = pEntry;
313 pdmBlkCacheListAdd(pList, pEntry->cbData);
314 pEntry->pList = pList;
315#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
316 pdmBlkCacheCheckList(pList, NULL);
317#endif
318}
319
320/**
321 * Destroys a LRU list freeing all entries.
322 *
323 * @returns nothing
324 * @param pList Pointer to the LRU list to destroy.
325 *
326 * @note The caller must own the critical section of the cache.
327 */
328static void pdmBlkCacheDestroyList(PPDMBLKLRULIST pList)
329{
330 while (pList->pHead)
331 {
332 PPDMBLKCACHEENTRY pEntry = pList->pHead;
333
334 pList->pHead = pEntry->pNext;
335
336 AssertMsg(!(pEntry->fFlags & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY)),
337 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
338
339 RTMemPageFree(pEntry->pbData, pEntry->cbData);
340 RTMemFree(pEntry);
341 }
342}
343
344/**
345 * Tries to remove the given amount of bytes from a given list in the cache
346 * moving the entries to one of the given ghosts lists
347 *
348 * @returns Amount of data which could be freed.
349 * @param pCache Pointer to the global cache data.
350 * @param cbData The amount of the data to free.
351 * @param pListSrc The source list to evict data from.
352 * @param pGhostListDst Where the ghost list removed entries should be
353 * moved to, NULL if the entry should be freed.
354 * @param fReuseBuffer Flag whether a buffer should be reused if it has
355 * the same size
356 * @param ppbBuffer Where to store the address of the buffer if an
357 * entry with the same size was found and
358 * fReuseBuffer is true.
359 *
360 * @note This function may return fewer bytes than requested because entries
361 * may be marked as non evictable if they are used for I/O at the
362 * moment.
363 */
364static size_t pdmBlkCacheEvictPagesFrom(PPDMBLKCACHEGLOBAL pCache, size_t cbData,
365 PPDMBLKLRULIST pListSrc, PPDMBLKLRULIST pGhostListDst,
366 bool fReuseBuffer, uint8_t **ppbBuffer)
367{
368 size_t cbEvicted = 0;
369
370 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
371
372 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
373 AssertMsg( !pGhostListDst
374 || (pGhostListDst == &pCache->LruRecentlyUsedOut),
375 ("Destination list must be NULL or the recently used but paged out list\n"));
376
377 if (fReuseBuffer)
378 {
379 AssertPtr(ppbBuffer);
380 *ppbBuffer = NULL;
381 }
382
383 /* Start deleting from the tail. */
384 PPDMBLKCACHEENTRY pEntry = pListSrc->pTail;
385
386 while ((cbEvicted < cbData) && pEntry)
387 {
388 PPDMBLKCACHEENTRY pCurr = pEntry;
389
390 pEntry = pEntry->pPrev;
391
392 /* We can't evict pages which are currently in progress or dirty but not in progress */
393 if ( !(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
394 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
395 {
396 /* Ok eviction candidate. Grab the endpoint semaphore and check again
397 * because somebody else might have raced us. */
398 PPDMBLKCACHE pBlkCache = pCurr->pBlkCache;
399 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
400
401 if (!(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
402 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
403 {
404 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
405
406 if (fReuseBuffer && pCurr->cbData == cbData)
407 {
408 STAM_COUNTER_INC(&pCache->StatBuffersReused);
409 *ppbBuffer = pCurr->pbData;
410 }
411 else if (pCurr->pbData)
412 RTMemPageFree(pCurr->pbData, pCurr->cbData);
413
414 pCurr->pbData = NULL;
415 cbEvicted += pCurr->cbData;
416
417 pdmBlkCacheEntryRemoveFromList(pCurr);
418 pdmBlkCacheSub(pCache, pCurr->cbData);
419
420 if (pGhostListDst)
421 {
422 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
423
424 PPDMBLKCACHEENTRY pGhostEntFree = pGhostListDst->pTail;
425
426 /* We have to remove the last entries from the paged out list. */
427 while ( pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax
428 && pGhostEntFree)
429 {
430 PPDMBLKCACHEENTRY pFree = pGhostEntFree;
431 PPDMBLKCACHE pBlkCacheFree = pFree->pBlkCache;
432
433 pGhostEntFree = pGhostEntFree->pPrev;
434
435 RTSemRWRequestWrite(pBlkCacheFree->SemRWEntries, RT_INDEFINITE_WAIT);
436
437 if (ASMAtomicReadU32(&pFree->cRefs) == 0)
438 {
439 pdmBlkCacheEntryRemoveFromList(pFree);
440
441 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
442 RTAvlrU64Remove(pBlkCacheFree->pTree, pFree->Core.Key);
443 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
444
445 RTMemFree(pFree);
446 }
447
448 RTSemRWReleaseWrite(pBlkCacheFree->SemRWEntries);
449 }
450
451 if (pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax)
452 {
453 /* Couldn't remove enough entries. Delete */
454 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
455 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
456 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
457
458 RTMemFree(pCurr);
459 }
460 else
461 pdmBlkCacheEntryAddToList(pGhostListDst, pCurr);
462 }
463 else
464 {
465 /* Delete the entry from the AVL tree it is assigned to. */
466 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
467 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
468 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
469
470 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
471 RTMemFree(pCurr);
472 }
473 }
474 else
475 {
476 LogFlow(("Someone raced us, entry %#p (%u bytes) cannot be evicted any more (fFlags=%#x cRefs=%#x)\n",
477 pCurr, pCurr->cbData, pCurr->fFlags, pCurr->cRefs));
478 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
479 }
480
481 }
482 else
483 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
484 }
485
486 return cbEvicted;
487}
488
489static bool pdmBlkCacheReclaim(PPDMBLKCACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
490{
491 size_t cbRemoved = 0;
492
493 if ((pCache->cbCached + cbData) < pCache->cbMax)
494 return true;
495 else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax)
496 {
497 /* Try to evict as many bytes as possible from A1in */
498 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn,
499 &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer);
500
501 /*
502 * If it was not possible to remove enough entries
503 * try the frequently accessed cache.
504 */
505 if (cbRemoved < cbData)
506 {
507 Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */
508
509 /*
510 * If we removed something we can't pass the reuse buffer flag anymore because
511 * we don't need to evict that much data
512 */
513 if (!cbRemoved)
514 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
515 NULL, fReuseBuffer, ppbBuffer);
516 else
517 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
518 NULL, false, NULL);
519 }
520 }
521 else
522 {
523 /* We have to remove entries from frequently access list. */
524 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
525 NULL, fReuseBuffer, ppbBuffer);
526 }
527
528 LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData));
529 return (cbRemoved >= cbData);
530}
531
532DECLINLINE(int) pdmBlkCacheEnqueue(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cbXfer, PPDMBLKCACHEIOXFER pIoXfer)
533{
534 int rc = VINF_SUCCESS;
535
536 LogFlowFunc(("%s: Enqueuing hIoXfer=%#p enmXferDir=%d\n",
537 __FUNCTION__, pIoXfer, pIoXfer->enmXferDir));
538
539 ASMAtomicIncU32(&pBlkCache->cIoXfersActive);
540 pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: I/O req %#p (%RTbool , %d) queued (%u now active)",
541 pIoXfer, pIoXfer->fIoCache, pIoXfer->enmXferDir, pBlkCache->cIoXfersActive);
542
543 switch (pBlkCache->enmType)
544 {
545 case PDMBLKCACHETYPE_DEV:
546 {
547 rc = pBlkCache->u.Dev.pfnXferEnqueue(pBlkCache->u.Dev.pDevIns,
548 pIoXfer->enmXferDir,
549 off, cbXfer,
550 &pIoXfer->SgBuf, pIoXfer);
551 break;
552 }
553 case PDMBLKCACHETYPE_DRV:
554 {
555 rc = pBlkCache->u.Drv.pfnXferEnqueue(pBlkCache->u.Drv.pDrvIns,
556 pIoXfer->enmXferDir,
557 off, cbXfer,
558 &pIoXfer->SgBuf, pIoXfer);
559 break;
560 }
561 case PDMBLKCACHETYPE_USB:
562 {
563 rc = pBlkCache->u.Usb.pfnXferEnqueue(pBlkCache->u.Usb.pUsbIns,
564 pIoXfer->enmXferDir,
565 off, cbXfer,
566 &pIoXfer->SgBuf, pIoXfer);
567 break;
568 }
569 case PDMBLKCACHETYPE_INTERNAL:
570 {
571 rc = pBlkCache->u.Int.pfnXferEnqueue(pBlkCache->u.Int.pvUser,
572 pIoXfer->enmXferDir,
573 off, cbXfer,
574 &pIoXfer->SgBuf, pIoXfer);
575 break;
576 }
577 default:
578 AssertMsgFailed(("Unknown block cache type!\n"));
579 }
580
581 if (RT_FAILURE(rc))
582 {
583 pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: Queueing I/O req %#p failed %Rrc", pIoXfer, rc);
584 ASMAtomicDecU32(&pBlkCache->cIoXfersActive);
585 }
586
587 LogFlowFunc(("%s: returns rc=%Rrc\n", __FUNCTION__, rc));
588 return rc;
589}
590
591/**
592 * Initiates a read I/O task for the given entry.
593 *
594 * @returns VBox status code.
595 * @param pEntry The entry to fetch the data to.
596 */
597static int pdmBlkCacheEntryReadFromMedium(PPDMBLKCACHEENTRY pEntry)
598{
599 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
600 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
601
602 /* Make sure no one evicts the entry while it is accessed. */
603 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
604
605 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
606 if (RT_UNLIKELY(!pIoXfer))
607 return VERR_NO_MEMORY;
608
609 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
610
611 pIoXfer->fIoCache = true;
612 pIoXfer->pEntry = pEntry;
613 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
614 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
615 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_READ;
616 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
617
618 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pEntry->cbData, pIoXfer);
619}
620
621/**
622 * Initiates a write I/O task for the given entry.
623 *
624 * @returns nothing.
625 * @param pEntry The entry to read the data from.
626 */
627static int pdmBlkCacheEntryWriteToMedium(PPDMBLKCACHEENTRY pEntry)
628{
629 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
630 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
631
632 /* Make sure no one evicts the entry while it is accessed. */
633 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
634
635 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
636 if (RT_UNLIKELY(!pIoXfer))
637 return VERR_NO_MEMORY;
638
639 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
640
641 pIoXfer->fIoCache = true;
642 pIoXfer->pEntry = pEntry;
643 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
644 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
645 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_WRITE;
646 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
647
648 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pEntry->cbData, pIoXfer);
649}
650
651/**
652 * Passthrough a part of a request directly to the I/O manager handling the
653 * endpoint.
654 *
655 * @returns VBox status code.
656 * @param pBlkCache The endpoint cache.
657 * @param pReq The request.
658 * @param pSgBuf The scatter/gather buffer.
659 * @param offStart Offset to start transfer from.
660 * @param cbData Amount of data to transfer.
661 * @param enmXferDir The transfer type (read/write)
662 */
663static int pdmBlkCacheRequestPassthrough(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
664 PRTSGBUF pSgBuf, uint64_t offStart, size_t cbData,
665 PDMBLKCACHEXFERDIR enmXferDir)
666{
667
668 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
669 if (RT_UNLIKELY(!pIoXfer))
670 return VERR_NO_MEMORY;
671
672 ASMAtomicIncU32(&pReq->cXfersPending);
673 pIoXfer->fIoCache = false;
674 pIoXfer->pReq = pReq;
675 pIoXfer->enmXferDir = enmXferDir;
676 if (pSgBuf)
677 {
678 RTSgBufClone(&pIoXfer->SgBuf, pSgBuf);
679 RTSgBufAdvance(pSgBuf, cbData);
680 }
681
682 return pdmBlkCacheEnqueue(pBlkCache, offStart, cbData, pIoXfer);
683}
684
685/**
686 * Commit a single dirty entry to the endpoint
687 *
688 * @returns nothing
689 * @param pEntry The entry to commit.
690 */
691static void pdmBlkCacheEntryCommit(PPDMBLKCACHEENTRY pEntry)
692{
693 AssertMsg( (pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY)
694 && !(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
695 ("Invalid flags set for entry %#p\n", pEntry));
696
697 pdmBlkCacheEntryWriteToMedium(pEntry);
698}
699
700/**
701 * Commit all dirty entries for a single endpoint.
702 *
703 * @returns nothing.
704 * @param pBlkCache The endpoint cache to commit.
705 */
706static void pdmBlkCacheCommit(PPDMBLKCACHE pBlkCache)
707{
708 uint32_t cbCommitted = 0;
709
710 /* Return if the cache was suspended. */
711 if (pBlkCache->fSuspended)
712 return;
713
714 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
715
716 /* The list is moved to a new header to reduce locking overhead. */
717 RTLISTANCHOR ListDirtyNotCommitted;
718
719 RTSpinlockAcquire(pBlkCache->LockList);
720 RTListMove(&ListDirtyNotCommitted, &pBlkCache->ListDirtyNotCommitted);
721 RTSpinlockRelease(pBlkCache->LockList);
722
723 if (!RTListIsEmpty(&ListDirtyNotCommitted))
724 {
725 PPDMBLKCACHEENTRY pEntry = RTListGetFirst(&ListDirtyNotCommitted, PDMBLKCACHEENTRY, NodeNotCommitted);
726
727 while (!RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted))
728 {
729 PPDMBLKCACHEENTRY pNext = RTListNodeGetNext(&pEntry->NodeNotCommitted, PDMBLKCACHEENTRY,
730 NodeNotCommitted);
731 pdmBlkCacheEntryCommit(pEntry);
732 cbCommitted += pEntry->cbData;
733 RTListNodeRemove(&pEntry->NodeNotCommitted);
734 pEntry = pNext;
735 }
736
737 /* Commit the last endpoint */
738 Assert(RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted));
739 pdmBlkCacheEntryCommit(pEntry);
740 cbCommitted += pEntry->cbData;
741 RTListNodeRemove(&pEntry->NodeNotCommitted);
742 AssertMsg(RTListIsEmpty(&ListDirtyNotCommitted),
743 ("Committed all entries but list is not empty\n"));
744 }
745
746 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
747 AssertMsg(pBlkCache->pCache->cbDirty >= cbCommitted,
748 ("Number of committed bytes exceeds number of dirty bytes\n"));
749 uint32_t cbDirtyOld = ASMAtomicSubU32(&pBlkCache->pCache->cbDirty, cbCommitted);
750
751 /* Reset the commit timer if we don't have any dirty bits. */
752 if ( !(cbDirtyOld - cbCommitted)
753 && pBlkCache->pCache->u32CommitTimeoutMs != 0)
754 TMTimerStop(pBlkCache->pCache->pVM, pBlkCache->pCache->hTimerCommit);
755}
756
757/**
758 * Commit all dirty entries in the cache.
759 *
760 * @returns nothing.
761 * @param pCache The global cache instance.
762 */
763static void pdmBlkCacheCommitDirtyEntries(PPDMBLKCACHEGLOBAL pCache)
764{
765 bool fCommitInProgress = ASMAtomicXchgBool(&pCache->fCommitInProgress, true);
766
767 if (!fCommitInProgress)
768 {
769 pdmBlkCacheLockEnter(pCache);
770 Assert(!RTListIsEmpty(&pCache->ListUsers));
771
772 PPDMBLKCACHE pBlkCache = RTListGetFirst(&pCache->ListUsers, PDMBLKCACHE, NodeCacheUser);
773 AssertPtr(pBlkCache);
774
775 while (!RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser))
776 {
777 pdmBlkCacheCommit(pBlkCache);
778
779 pBlkCache = RTListNodeGetNext(&pBlkCache->NodeCacheUser, PDMBLKCACHE,
780 NodeCacheUser);
781 }
782
783 /* Commit the last endpoint */
784 Assert(RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser));
785 pdmBlkCacheCommit(pBlkCache);
786
787 pdmBlkCacheLockLeave(pCache);
788 ASMAtomicWriteBool(&pCache->fCommitInProgress, false);
789 }
790}
791
792/**
793 * Adds the given entry as a dirty to the cache.
794 *
795 * @returns Flag whether the amount of dirty bytes in the cache exceeds the threshold
796 * @param pBlkCache The endpoint cache the entry belongs to.
797 * @param pEntry The entry to add.
798 */
799static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
800{
801 bool fDirtyBytesExceeded = false;
802 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
803
804 /* If the commit timer is disabled we commit right away. */
805 if (pCache->u32CommitTimeoutMs == 0)
806 {
807 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
808 pdmBlkCacheEntryCommit(pEntry);
809 }
810 else if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY))
811 {
812 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
813
814 RTSpinlockAcquire(pBlkCache->LockList);
815 RTListAppend(&pBlkCache->ListDirtyNotCommitted, &pEntry->NodeNotCommitted);
816 RTSpinlockRelease(pBlkCache->LockList);
817
818 uint32_t cbDirty = ASMAtomicAddU32(&pCache->cbDirty, pEntry->cbData);
819
820 /* Prevent committing if the VM was suspended. */
821 if (RT_LIKELY(!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended)))
822 fDirtyBytesExceeded = (cbDirty + pEntry->cbData >= pCache->cbCommitDirtyThreshold);
823 else if (!cbDirty && pCache->u32CommitTimeoutMs > 0)
824 {
825 /* Arm the commit timer. */
826 TMTimerSetMillies(pCache->pVM, pCache->hTimerCommit, pCache->u32CommitTimeoutMs);
827 }
828 }
829
830 return fDirtyBytesExceeded;
831}
832
833static PPDMBLKCACHE pdmR3BlkCacheFindById(PPDMBLKCACHEGLOBAL pBlkCacheGlobal, const char *pcszId)
834{
835 bool fFound = false;
836
837 PPDMBLKCACHE pBlkCache;
838 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
839 {
840 if (!RTStrCmp(pBlkCache->pszId, pcszId))
841 {
842 fFound = true;
843 break;
844 }
845 }
846
847 return fFound ? pBlkCache : NULL;
848}
849
850/**
851 * @callback_method_impl{FNTMTIMERINT, Commit timer callback.}
852 */
853static DECLCALLBACK(void) pdmBlkCacheCommitTimerCallback(PVM pVM, TMTIMERHANDLE hTimer, void *pvUser)
854{
855 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
856 RT_NOREF(pVM, hTimer);
857
858 LogFlowFunc(("Commit interval expired, commiting dirty entries\n"));
859
860 if ( ASMAtomicReadU32(&pCache->cbDirty) > 0
861 && !ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
862 pdmBlkCacheCommitDirtyEntries(pCache);
863
864 LogFlowFunc(("Entries committed, going to sleep\n"));
865}
866
867static DECLCALLBACK(int) pdmR3BlkCacheSaveExec(PVM pVM, PSSMHANDLE pSSM)
868{
869 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
870
871 AssertPtr(pBlkCacheGlobal);
872
873 pdmBlkCacheLockEnter(pBlkCacheGlobal);
874
875 SSMR3PutU32(pSSM, pBlkCacheGlobal->cRefs);
876
877 /* Go through the list and save all dirty entries. */
878 PPDMBLKCACHE pBlkCache;
879 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
880 {
881 uint32_t cEntries = 0;
882 PPDMBLKCACHEENTRY pEntry;
883
884 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
885 SSMR3PutU32(pSSM, (uint32_t)strlen(pBlkCache->pszId));
886 SSMR3PutStrZ(pSSM, pBlkCache->pszId);
887
888 /* Count the number of entries to safe. */
889 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
890 {
891 cEntries++;
892 }
893
894 SSMR3PutU32(pSSM, cEntries);
895
896 /* Walk the list of all dirty entries and save them. */
897 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
898 {
899 /* A few sanity checks. */
900 AssertMsg(!pEntry->cRefs, ("The entry is still referenced\n"));
901 AssertMsg(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY, ("Entry is not dirty\n"));
902 AssertMsg(!(pEntry->fFlags & ~PDMBLKCACHE_ENTRY_IS_DIRTY), ("Invalid flags set\n"));
903 AssertMsg(!pEntry->pWaitingHead && !pEntry->pWaitingTail, ("There are waiting requests\n"));
904 AssertMsg( pEntry->pList == &pBlkCacheGlobal->LruRecentlyUsedIn
905 || pEntry->pList == &pBlkCacheGlobal->LruFrequentlyUsed,
906 ("Invalid list\n"));
907 AssertMsg(pEntry->cbData == pEntry->Core.KeyLast - pEntry->Core.Key + 1,
908 ("Size and range do not match\n"));
909
910 /* Save */
911 SSMR3PutU64(pSSM, pEntry->Core.Key);
912 SSMR3PutU32(pSSM, pEntry->cbData);
913 SSMR3PutMem(pSSM, pEntry->pbData, pEntry->cbData);
914 }
915
916 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
917 }
918
919 pdmBlkCacheLockLeave(pBlkCacheGlobal);
920
921 /* Terminator */
922 return SSMR3PutU32(pSSM, UINT32_MAX);
923}
924
925static DECLCALLBACK(int) pdmR3BlkCacheLoadExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass)
926{
927 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
928 uint32_t cRefs;
929
930 NOREF(uPass);
931 AssertPtr(pBlkCacheGlobal);
932
933 pdmBlkCacheLockEnter(pBlkCacheGlobal);
934
935 if (uVersion != PDM_BLK_CACHE_SAVED_STATE_VERSION)
936 return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION;
937
938 SSMR3GetU32(pSSM, &cRefs);
939
940 /*
941 * Fewer users in the saved state than in the current VM are allowed
942 * because that means that there are only new ones which don't have any saved state
943 * which can get lost.
944 * More saved state entries than registered cache users are only allowed if the
945 * missing users don't have any data saved in the cache.
946 */
947 int rc = VINF_SUCCESS;
948 char *pszId = NULL;
949
950 while ( cRefs > 0
951 && RT_SUCCESS(rc))
952 {
953 PPDMBLKCACHE pBlkCache = NULL;
954 uint32_t cbId = 0;
955
956 SSMR3GetU32(pSSM, &cbId);
957 Assert(cbId > 0);
958
959 cbId++; /* Include terminator */
960 pszId = (char *)RTMemAllocZ(cbId * sizeof(char));
961 if (!pszId)
962 {
963 rc = VERR_NO_MEMORY;
964 break;
965 }
966
967 rc = SSMR3GetStrZ(pSSM, pszId, cbId);
968 AssertRC(rc);
969
970 /* Search for the block cache with the provided id. */
971 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pszId);
972
973 /* Get the entries */
974 uint32_t cEntries;
975 SSMR3GetU32(pSSM, &cEntries);
976
977 if (!pBlkCache && (cEntries > 0))
978 {
979 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
980 N_("The VM is missing a block device and there is data in the cache. Please make sure the source and target VMs have compatible storage configurations"));
981 break;
982 }
983
984 RTMemFree(pszId);
985 pszId = NULL;
986
987 while (cEntries > 0)
988 {
989 PPDMBLKCACHEENTRY pEntry;
990 uint64_t off;
991 uint32_t cbEntry;
992
993 SSMR3GetU64(pSSM, &off);
994 SSMR3GetU32(pSSM, &cbEntry);
995
996 pEntry = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, NULL);
997 if (!pEntry)
998 {
999 rc = VERR_NO_MEMORY;
1000 break;
1001 }
1002
1003 rc = SSMR3GetMem(pSSM, pEntry->pbData, cbEntry);
1004 if (RT_FAILURE(rc))
1005 {
1006 RTMemFree(pEntry->pbData);
1007 RTMemFree(pEntry);
1008 break;
1009 }
1010
1011 /* Insert into the tree. */
1012 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
1013 Assert(fInserted); NOREF(fInserted);
1014
1015 /* Add to the dirty list. */
1016 pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
1017 pdmBlkCacheEntryAddToList(&pBlkCacheGlobal->LruRecentlyUsedIn, pEntry);
1018 pdmBlkCacheAdd(pBlkCacheGlobal, cbEntry);
1019 pdmBlkCacheEntryRelease(pEntry);
1020 cEntries--;
1021 }
1022
1023 cRefs--;
1024 }
1025
1026 if (pszId)
1027 RTMemFree(pszId);
1028
1029 if (cRefs && RT_SUCCESS(rc))
1030 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
1031 N_("Unexpected error while restoring state. Please make sure the source and target VMs have compatible storage configurations"));
1032
1033 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1034
1035 if (RT_SUCCESS(rc))
1036 {
1037 uint32_t u32 = 0;
1038 rc = SSMR3GetU32(pSSM, &u32);
1039 if (RT_SUCCESS(rc))
1040 AssertMsgReturn(u32 == UINT32_MAX, ("%#x\n", u32), VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
1041 }
1042
1043 return rc;
1044}
1045
1046int pdmR3BlkCacheInit(PVM pVM)
1047{
1048 int rc = VINF_SUCCESS;
1049 PUVM pUVM = pVM->pUVM;
1050 PPDMBLKCACHEGLOBAL pBlkCacheGlobal;
1051
1052 LogFlowFunc((": pVM=%p\n", pVM));
1053
1054 VM_ASSERT_EMT(pVM);
1055
1056 PCFGMNODE pCfgRoot = CFGMR3GetRoot(pVM);
1057 PCFGMNODE pCfgBlkCache = CFGMR3GetChild(CFGMR3GetChild(pCfgRoot, "PDM"), "BlkCache");
1058
1059 pBlkCacheGlobal = (PPDMBLKCACHEGLOBAL)RTMemAllocZ(sizeof(PDMBLKCACHEGLOBAL));
1060 if (!pBlkCacheGlobal)
1061 return VERR_NO_MEMORY;
1062
1063 RTListInit(&pBlkCacheGlobal->ListUsers);
1064 pBlkCacheGlobal->pVM = pVM;
1065 pBlkCacheGlobal->cRefs = 0;
1066 pBlkCacheGlobal->cbCached = 0;
1067 pBlkCacheGlobal->fCommitInProgress = false;
1068
1069 /* Initialize members */
1070 pBlkCacheGlobal->LruRecentlyUsedIn.pHead = NULL;
1071 pBlkCacheGlobal->LruRecentlyUsedIn.pTail = NULL;
1072 pBlkCacheGlobal->LruRecentlyUsedIn.cbCached = 0;
1073
1074 pBlkCacheGlobal->LruRecentlyUsedOut.pHead = NULL;
1075 pBlkCacheGlobal->LruRecentlyUsedOut.pTail = NULL;
1076 pBlkCacheGlobal->LruRecentlyUsedOut.cbCached = 0;
1077
1078 pBlkCacheGlobal->LruFrequentlyUsed.pHead = NULL;
1079 pBlkCacheGlobal->LruFrequentlyUsed.pTail = NULL;
1080 pBlkCacheGlobal->LruFrequentlyUsed.cbCached = 0;
1081
1082 do
1083 {
1084 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheSize", &pBlkCacheGlobal->cbMax, 5 * _1M);
1085 AssertLogRelRCBreak(rc);
1086 LogFlowFunc(("Maximum number of bytes cached %u\n", pBlkCacheGlobal->cbMax));
1087
1088 pBlkCacheGlobal->cbRecentlyUsedInMax = (pBlkCacheGlobal->cbMax / 100) * 25; /* 25% of the buffer size */
1089 pBlkCacheGlobal->cbRecentlyUsedOutMax = (pBlkCacheGlobal->cbMax / 100) * 50; /* 50% of the buffer size */
1090 LogFlowFunc(("cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n",
1091 pBlkCacheGlobal->cbRecentlyUsedInMax, pBlkCacheGlobal->cbRecentlyUsedOutMax));
1092
1093 /** @todo r=aeichner: Experiment to find optimal default values */
1094 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitIntervalMs", &pBlkCacheGlobal->u32CommitTimeoutMs, 10000 /* 10sec */);
1095 AssertLogRelRCBreak(rc);
1096 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitThreshold", &pBlkCacheGlobal->cbCommitDirtyThreshold, pBlkCacheGlobal->cbMax / 2);
1097 AssertLogRelRCBreak(rc);
1098 } while (0);
1099
1100 if (RT_SUCCESS(rc))
1101 {
1102 STAMR3Register(pVM, &pBlkCacheGlobal->cbMax,
1103 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1104 "/PDM/BlkCache/cbMax",
1105 STAMUNIT_BYTES,
1106 "Maximum cache size");
1107 STAMR3Register(pVM, &pBlkCacheGlobal->cbCached,
1108 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1109 "/PDM/BlkCache/cbCached",
1110 STAMUNIT_BYTES,
1111 "Currently used cache");
1112 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedIn.cbCached,
1113 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1114 "/PDM/BlkCache/cbCachedMruIn",
1115 STAMUNIT_BYTES,
1116 "Number of bytes cached in MRU list");
1117 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedOut.cbCached,
1118 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1119 "/PDM/BlkCache/cbCachedMruOut",
1120 STAMUNIT_BYTES,
1121 "Number of bytes cached in FRU list");
1122 STAMR3Register(pVM, &pBlkCacheGlobal->LruFrequentlyUsed.cbCached,
1123 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1124 "/PDM/BlkCache/cbCachedFru",
1125 STAMUNIT_BYTES,
1126 "Number of bytes cached in FRU ghost list");
1127
1128#ifdef VBOX_WITH_STATISTICS
1129 STAMR3Register(pVM, &pBlkCacheGlobal->cHits,
1130 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1131 "/PDM/BlkCache/CacheHits",
1132 STAMUNIT_COUNT, "Number of hits in the cache");
1133 STAMR3Register(pVM, &pBlkCacheGlobal->cPartialHits,
1134 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1135 "/PDM/BlkCache/CachePartialHits",
1136 STAMUNIT_COUNT, "Number of partial hits in the cache");
1137 STAMR3Register(pVM, &pBlkCacheGlobal->cMisses,
1138 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1139 "/PDM/BlkCache/CacheMisses",
1140 STAMUNIT_COUNT, "Number of misses when accessing the cache");
1141 STAMR3Register(pVM, &pBlkCacheGlobal->StatRead,
1142 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1143 "/PDM/BlkCache/CacheRead",
1144 STAMUNIT_BYTES, "Number of bytes read from the cache");
1145 STAMR3Register(pVM, &pBlkCacheGlobal->StatWritten,
1146 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1147 "/PDM/BlkCache/CacheWritten",
1148 STAMUNIT_BYTES, "Number of bytes written to the cache");
1149 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeGet,
1150 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1151 "/PDM/BlkCache/CacheTreeGet",
1152 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
1153 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeInsert,
1154 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1155 "/PDM/BlkCache/CacheTreeInsert",
1156 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
1157 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeRemove,
1158 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1159 "/PDM/BlkCache/CacheTreeRemove",
1160 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
1161 STAMR3Register(pVM, &pBlkCacheGlobal->StatBuffersReused,
1162 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1163 "/PDM/BlkCache/CacheBuffersReused",
1164 STAMUNIT_COUNT, "Number of times a buffer could be reused");
1165#endif
1166
1167 /* Initialize the critical section */
1168 rc = RTCritSectInit(&pBlkCacheGlobal->CritSect);
1169 }
1170
1171 if (RT_SUCCESS(rc))
1172 {
1173 /* Create the commit timer */
1174 if (pBlkCacheGlobal->u32CommitTimeoutMs > 0)
1175 rc = TMR3TimerCreate(pVM, TMCLOCK_REAL, pdmBlkCacheCommitTimerCallback, pBlkCacheGlobal,
1176 TMTIMER_FLAGS_NO_RING0, "BlkCache-Commit", &pBlkCacheGlobal->hTimerCommit);
1177
1178 if (RT_SUCCESS(rc))
1179 {
1180 /* Register saved state handler. */
1181 rc = SSMR3RegisterInternal(pVM, "pdmblkcache", 0, PDM_BLK_CACHE_SAVED_STATE_VERSION, pBlkCacheGlobal->cbMax,
1182 NULL, NULL, NULL,
1183 NULL, pdmR3BlkCacheSaveExec, NULL,
1184 NULL, pdmR3BlkCacheLoadExec, NULL);
1185 if (RT_SUCCESS(rc))
1186 {
1187 LogRel(("BlkCache: Cache successfully initialized. Cache size is %u bytes\n", pBlkCacheGlobal->cbMax));
1188 LogRel(("BlkCache: Cache commit interval is %u ms\n", pBlkCacheGlobal->u32CommitTimeoutMs));
1189 LogRel(("BlkCache: Cache commit threshold is %u bytes\n", pBlkCacheGlobal->cbCommitDirtyThreshold));
1190 pUVM->pdm.s.pBlkCacheGlobal = pBlkCacheGlobal;
1191 return VINF_SUCCESS;
1192 }
1193 }
1194
1195 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1196 }
1197
1198 if (pBlkCacheGlobal)
1199 RTMemFree(pBlkCacheGlobal);
1200
1201 LogFlowFunc((": returns rc=%Rrc\n", rc));
1202 return rc;
1203}
1204
1205void pdmR3BlkCacheTerm(PVM pVM)
1206{
1207 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1208
1209 if (pBlkCacheGlobal)
1210 {
1211 /* Make sure no one else uses the cache now */
1212 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1213
1214 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
1215 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedIn);
1216 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedOut);
1217 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruFrequentlyUsed);
1218
1219 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1220
1221 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1222 RTMemFree(pBlkCacheGlobal);
1223 pVM->pUVM->pdm.s.pBlkCacheGlobal = NULL;
1224 }
1225}
1226
1227int pdmR3BlkCacheResume(PVM pVM)
1228{
1229 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1230
1231 LogFlowFunc(("pVM=%#p\n", pVM));
1232
1233 if ( pBlkCacheGlobal
1234 && ASMAtomicXchgBool(&pBlkCacheGlobal->fIoErrorVmSuspended, false))
1235 {
1236 /* The VM was suspended because of an I/O error, commit all dirty entries. */
1237 pdmBlkCacheCommitDirtyEntries(pBlkCacheGlobal);
1238 }
1239
1240 return VINF_SUCCESS;
1241}
1242
1243static int pdmR3BlkCacheRetain(PVM pVM, PPPDMBLKCACHE ppBlkCache, const char *pcszId)
1244{
1245 int rc = VINF_SUCCESS;
1246 PPDMBLKCACHE pBlkCache = NULL;
1247 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1248
1249 if (!pBlkCacheGlobal)
1250 return VERR_NOT_SUPPORTED;
1251
1252 /*
1253 * Check that no other user cache has the same id first,
1254 * Unique id's are necessary in case the state is saved.
1255 */
1256 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1257
1258 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pcszId);
1259
1260 if (!pBlkCache)
1261 {
1262 pBlkCache = (PPDMBLKCACHE)RTMemAllocZ(sizeof(PDMBLKCACHE));
1263
1264 if (pBlkCache)
1265 pBlkCache->pszId = RTStrDup(pcszId);
1266
1267 if ( pBlkCache
1268 && pBlkCache->pszId)
1269 {
1270 pBlkCache->fSuspended = false;
1271 pBlkCache->cIoXfersActive = 0;
1272 pBlkCache->pCache = pBlkCacheGlobal;
1273 RTListInit(&pBlkCache->ListDirtyNotCommitted);
1274
1275 rc = RTSpinlockCreate(&pBlkCache->LockList, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "pdmR3BlkCacheRetain");
1276 if (RT_SUCCESS(rc))
1277 {
1278 rc = RTSemRWCreate(&pBlkCache->SemRWEntries);
1279 if (RT_SUCCESS(rc))
1280 {
1281 pBlkCache->pTree = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
1282 if (pBlkCache->pTree)
1283 {
1284#ifdef VBOX_WITH_STATISTICS
1285 STAMR3RegisterF(pBlkCacheGlobal->pVM, &pBlkCache->StatWriteDeferred,
1286 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1287 STAMUNIT_COUNT, "Number of deferred writes",
1288 "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
1289#endif
1290
1291 /* Add to the list of users. */
1292 pBlkCacheGlobal->cRefs++;
1293 RTListAppend(&pBlkCacheGlobal->ListUsers, &pBlkCache->NodeCacheUser);
1294 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1295
1296 *ppBlkCache = pBlkCache;
1297 LogFlowFunc(("returns success\n"));
1298 return VINF_SUCCESS;
1299 }
1300
1301 rc = VERR_NO_MEMORY;
1302 RTSemRWDestroy(pBlkCache->SemRWEntries);
1303 }
1304
1305 RTSpinlockDestroy(pBlkCache->LockList);
1306 }
1307
1308 RTStrFree(pBlkCache->pszId);
1309 }
1310 else
1311 rc = VERR_NO_MEMORY;
1312
1313 if (pBlkCache)
1314 RTMemFree(pBlkCache);
1315 }
1316 else
1317 rc = VERR_ALREADY_EXISTS;
1318
1319 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1320
1321 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1322 return rc;
1323}
1324
1325VMMR3DECL(int) PDMR3BlkCacheRetainDriver(PVM pVM, PPDMDRVINS pDrvIns, PPPDMBLKCACHE ppBlkCache,
1326 PFNPDMBLKCACHEXFERCOMPLETEDRV pfnXferComplete,
1327 PFNPDMBLKCACHEXFERENQUEUEDRV pfnXferEnqueue,
1328 PFNPDMBLKCACHEXFERENQUEUEDISCARDDRV pfnXferEnqueueDiscard,
1329 const char *pcszId)
1330{
1331 int rc = VINF_SUCCESS;
1332 PPDMBLKCACHE pBlkCache;
1333
1334 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1335 if (RT_SUCCESS(rc))
1336 {
1337 pBlkCache->enmType = PDMBLKCACHETYPE_DRV;
1338 pBlkCache->u.Drv.pfnXferComplete = pfnXferComplete;
1339 pBlkCache->u.Drv.pfnXferEnqueue = pfnXferEnqueue;
1340 pBlkCache->u.Drv.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1341 pBlkCache->u.Drv.pDrvIns = pDrvIns;
1342 *ppBlkCache = pBlkCache;
1343 }
1344
1345 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1346 return rc;
1347}
1348
1349VMMR3DECL(int) PDMR3BlkCacheRetainDevice(PVM pVM, PPDMDEVINS pDevIns, PPPDMBLKCACHE ppBlkCache,
1350 PFNPDMBLKCACHEXFERCOMPLETEDEV pfnXferComplete,
1351 PFNPDMBLKCACHEXFERENQUEUEDEV pfnXferEnqueue,
1352 PFNPDMBLKCACHEXFERENQUEUEDISCARDDEV pfnXferEnqueueDiscard,
1353 const char *pcszId)
1354{
1355 int rc = VINF_SUCCESS;
1356 PPDMBLKCACHE pBlkCache;
1357
1358 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1359 if (RT_SUCCESS(rc))
1360 {
1361 pBlkCache->enmType = PDMBLKCACHETYPE_DEV;
1362 pBlkCache->u.Dev.pfnXferComplete = pfnXferComplete;
1363 pBlkCache->u.Dev.pfnXferEnqueue = pfnXferEnqueue;
1364 pBlkCache->u.Dev.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1365 pBlkCache->u.Dev.pDevIns = pDevIns;
1366 *ppBlkCache = pBlkCache;
1367 }
1368
1369 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1370 return rc;
1371
1372}
1373
1374VMMR3DECL(int) PDMR3BlkCacheRetainUsb(PVM pVM, PPDMUSBINS pUsbIns, PPPDMBLKCACHE ppBlkCache,
1375 PFNPDMBLKCACHEXFERCOMPLETEUSB pfnXferComplete,
1376 PFNPDMBLKCACHEXFERENQUEUEUSB pfnXferEnqueue,
1377 PFNPDMBLKCACHEXFERENQUEUEDISCARDUSB pfnXferEnqueueDiscard,
1378 const char *pcszId)
1379{
1380 int rc = VINF_SUCCESS;
1381 PPDMBLKCACHE pBlkCache;
1382
1383 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1384 if (RT_SUCCESS(rc))
1385 {
1386 pBlkCache->enmType = PDMBLKCACHETYPE_USB;
1387 pBlkCache->u.Usb.pfnXferComplete = pfnXferComplete;
1388 pBlkCache->u.Usb.pfnXferEnqueue = pfnXferEnqueue;
1389 pBlkCache->u.Usb.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1390 pBlkCache->u.Usb.pUsbIns = pUsbIns;
1391 *ppBlkCache = pBlkCache;
1392 }
1393
1394 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1395 return rc;
1396
1397}
1398
1399VMMR3DECL(int) PDMR3BlkCacheRetainInt(PVM pVM, void *pvUser, PPPDMBLKCACHE ppBlkCache,
1400 PFNPDMBLKCACHEXFERCOMPLETEINT pfnXferComplete,
1401 PFNPDMBLKCACHEXFERENQUEUEINT pfnXferEnqueue,
1402 PFNPDMBLKCACHEXFERENQUEUEDISCARDINT pfnXferEnqueueDiscard,
1403 const char *pcszId)
1404{
1405 int rc = VINF_SUCCESS;
1406 PPDMBLKCACHE pBlkCache;
1407
1408 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1409 if (RT_SUCCESS(rc))
1410 {
1411 pBlkCache->enmType = PDMBLKCACHETYPE_INTERNAL;
1412 pBlkCache->u.Int.pfnXferComplete = pfnXferComplete;
1413 pBlkCache->u.Int.pfnXferEnqueue = pfnXferEnqueue;
1414 pBlkCache->u.Int.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1415 pBlkCache->u.Int.pvUser = pvUser;
1416 *ppBlkCache = pBlkCache;
1417 }
1418
1419 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1420 return rc;
1421
1422}
1423
1424/**
1425 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
1426 *
1427 * @returns IPRT status code.
1428 * @param pNode The node to destroy.
1429 * @param pvUser Opaque user data.
1430 */
1431static DECLCALLBACK(int) pdmBlkCacheEntryDestroy(PAVLRU64NODECORE pNode, void *pvUser)
1432{
1433 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
1434 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
1435 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
1436
1437 while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)
1438 {
1439 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
1440 pdmBlkCacheEntryRef(pEntry);
1441 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1442 pdmBlkCacheLockLeave(pCache);
1443
1444 RTThreadSleep(250);
1445
1446 /* Re-enter all locks */
1447 pdmBlkCacheLockEnter(pCache);
1448 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1449 pdmBlkCacheEntryRelease(pEntry);
1450 }
1451
1452 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
1453 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
1454
1455 bool fUpdateCache = pEntry->pList == &pCache->LruFrequentlyUsed
1456 || pEntry->pList == &pCache->LruRecentlyUsedIn;
1457
1458 pdmBlkCacheEntryRemoveFromList(pEntry);
1459
1460 if (fUpdateCache)
1461 pdmBlkCacheSub(pCache, pEntry->cbData);
1462
1463 RTMemPageFree(pEntry->pbData, pEntry->cbData);
1464 RTMemFree(pEntry);
1465
1466 return VINF_SUCCESS;
1467}
1468
1469VMMR3DECL(void) PDMR3BlkCacheRelease(PPDMBLKCACHE pBlkCache)
1470{
1471 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1472
1473 /*
1474 * Commit all dirty entries now (they are waited on for completion during the
1475 * destruction of the AVL tree below).
1476 * The exception is if the VM was paused because of an I/O error before.
1477 */
1478 if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
1479 pdmBlkCacheCommit(pBlkCache);
1480
1481 /* Make sure nobody is accessing the cache while we delete the tree. */
1482 pdmBlkCacheLockEnter(pCache);
1483 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1484 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
1485 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1486
1487 RTSpinlockDestroy(pBlkCache->LockList);
1488
1489 pCache->cRefs--;
1490 RTListNodeRemove(&pBlkCache->NodeCacheUser);
1491
1492 pdmBlkCacheLockLeave(pCache);
1493
1494 RTMemFree(pBlkCache->pTree);
1495 pBlkCache->pTree = NULL;
1496 RTSemRWDestroy(pBlkCache->SemRWEntries);
1497
1498#ifdef VBOX_WITH_STATISTICS
1499 STAMR3DeregisterF(pCache->pVM->pUVM, "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
1500#endif
1501
1502 RTStrFree(pBlkCache->pszId);
1503 RTMemFree(pBlkCache);
1504}
1505
1506VMMR3DECL(void) PDMR3BlkCacheReleaseDevice(PVM pVM, PPDMDEVINS pDevIns)
1507{
1508 LogFlow(("%s: pDevIns=%p\n", __FUNCTION__, pDevIns));
1509
1510 /*
1511 * Validate input.
1512 */
1513 if (!pDevIns)
1514 return;
1515 VM_ASSERT_EMT(pVM);
1516
1517 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1518 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1519
1520 /* Return silently if not supported. */
1521 if (!pBlkCacheGlobal)
1522 return;
1523
1524 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1525
1526 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1527 {
1528 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DEV
1529 && pBlkCache->u.Dev.pDevIns == pDevIns)
1530 PDMR3BlkCacheRelease(pBlkCache);
1531 }
1532
1533 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1534}
1535
1536VMMR3DECL(void) PDMR3BlkCacheReleaseDriver(PVM pVM, PPDMDRVINS pDrvIns)
1537{
1538 LogFlow(("%s: pDrvIns=%p\n", __FUNCTION__, pDrvIns));
1539
1540 /*
1541 * Validate input.
1542 */
1543 if (!pDrvIns)
1544 return;
1545 VM_ASSERT_EMT(pVM);
1546
1547 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1548 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1549
1550 /* Return silently if not supported. */
1551 if (!pBlkCacheGlobal)
1552 return;
1553
1554 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1555
1556 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1557 {
1558 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DRV
1559 && pBlkCache->u.Drv.pDrvIns == pDrvIns)
1560 PDMR3BlkCacheRelease(pBlkCache);
1561 }
1562
1563 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1564}
1565
1566VMMR3DECL(void) PDMR3BlkCacheReleaseUsb(PVM pVM, PPDMUSBINS pUsbIns)
1567{
1568 LogFlow(("%s: pUsbIns=%p\n", __FUNCTION__, pUsbIns));
1569
1570 /*
1571 * Validate input.
1572 */
1573 if (!pUsbIns)
1574 return;
1575 VM_ASSERT_EMT(pVM);
1576
1577 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1578 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1579
1580 /* Return silently if not supported. */
1581 if (!pBlkCacheGlobal)
1582 return;
1583
1584 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1585
1586 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1587 {
1588 if ( pBlkCache->enmType == PDMBLKCACHETYPE_USB
1589 && pBlkCache->u.Usb.pUsbIns == pUsbIns)
1590 PDMR3BlkCacheRelease(pBlkCache);
1591 }
1592
1593 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1594}
1595
1596static PPDMBLKCACHEENTRY pdmBlkCacheGetCacheEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off)
1597{
1598 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeGet, Cache);
1599
1600 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1601 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)RTAvlrU64RangeGet(pBlkCache->pTree, off);
1602 if (pEntry)
1603 pdmBlkCacheEntryRef(pEntry);
1604 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1605
1606 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeGet, Cache);
1607
1608 return pEntry;
1609}
1610
1611/**
1612 * Return the best fit cache entries for the given offset.
1613 *
1614 * @returns nothing.
1615 * @param pBlkCache The endpoint cache.
1616 * @param off The offset.
1617 * @param ppEntryAbove Where to store the pointer to the best fit entry above
1618 * the given offset. NULL if not required.
1619 */
1620static void pdmBlkCacheGetCacheBestFitEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off, PPDMBLKCACHEENTRY *ppEntryAbove)
1621{
1622 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeGet, Cache);
1623
1624 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1625 if (ppEntryAbove)
1626 {
1627 *ppEntryAbove = (PPDMBLKCACHEENTRY)RTAvlrU64GetBestFit(pBlkCache->pTree, off, true /*fAbove*/);
1628 if (*ppEntryAbove)
1629 pdmBlkCacheEntryRef(*ppEntryAbove);
1630 }
1631
1632 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1633
1634 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeGet, Cache);
1635}
1636
1637static void pdmBlkCacheInsertEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
1638{
1639 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeInsert, Cache);
1640 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1641 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
1642 AssertMsg(fInserted, ("Node was not inserted into tree\n")); NOREF(fInserted);
1643 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeInsert, Cache);
1644 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1645}
1646
1647/**
1648 * Allocates and initializes a new entry for the cache.
1649 * The entry has a reference count of 1.
1650 *
1651 * @returns Pointer to the new cache entry or NULL if out of memory.
1652 * @param pBlkCache The cache the entry belongs to.
1653 * @param off Start offset.
1654 * @param cbData Size of the cache entry.
1655 * @param pbBuffer Pointer to the buffer to use.
1656 * NULL if a new buffer should be allocated.
1657 * The buffer needs to have the same size of the entry.
1658 */
1659static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cbData, uint8_t *pbBuffer)
1660{
1661 AssertReturn(cbData <= UINT32_MAX, NULL);
1662 PPDMBLKCACHEENTRY pEntryNew = (PPDMBLKCACHEENTRY)RTMemAllocZ(sizeof(PDMBLKCACHEENTRY));
1663
1664 if (RT_UNLIKELY(!pEntryNew))
1665 return NULL;
1666
1667 pEntryNew->Core.Key = off;
1668 pEntryNew->Core.KeyLast = off + cbData - 1;
1669 pEntryNew->pBlkCache = pBlkCache;
1670 pEntryNew->fFlags = 0;
1671 pEntryNew->cRefs = 1; /* We are using it now. */
1672 pEntryNew->pList = NULL;
1673 pEntryNew->cbData = (uint32_t)cbData;
1674 pEntryNew->pWaitingHead = NULL;
1675 pEntryNew->pWaitingTail = NULL;
1676 if (pbBuffer)
1677 pEntryNew->pbData = pbBuffer;
1678 else
1679 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
1680
1681 if (RT_UNLIKELY(!pEntryNew->pbData))
1682 {
1683 RTMemFree(pEntryNew);
1684 return NULL;
1685 }
1686
1687 return pEntryNew;
1688}
1689
1690/**
1691 * Checks that a set of flags is set/clear acquiring the R/W semaphore
1692 * in exclusive mode.
1693 *
1694 * @returns true if the flag in fSet is set and the one in fClear is clear.
1695 * false otherwise.
1696 * The R/W semaphore is only held if true is returned.
1697 *
1698 * @param pBlkCache The endpoint cache instance data.
1699 * @param pEntry The entry to check the flags for.
1700 * @param fSet The flag which is tested to be set.
1701 * @param fClear The flag which is tested to be clear.
1702 */
1703DECLINLINE(bool) pdmBlkCacheEntryFlagIsSetClearAcquireLock(PPDMBLKCACHE pBlkCache,
1704 PPDMBLKCACHEENTRY pEntry,
1705 uint32_t fSet, uint32_t fClear)
1706{
1707 uint32_t fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1708 bool fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1709
1710 if (fPassed)
1711 {
1712 /* Acquire the lock and check again because the completion callback might have raced us. */
1713 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1714
1715 fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1716 fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1717
1718 /* Drop the lock if we didn't passed the test. */
1719 if (!fPassed)
1720 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1721 }
1722
1723 return fPassed;
1724}
1725
1726/**
1727 * Adds a segment to the waiting list for a cache entry
1728 * which is currently in progress.
1729 *
1730 * @returns nothing.
1731 * @param pEntry The cache entry to add the segment to.
1732 * @param pWaiter The waiter entry to add.
1733 */
1734DECLINLINE(void) pdmBlkCacheEntryAddWaiter(PPDMBLKCACHEENTRY pEntry,
1735 PPDMBLKCACHEWAITER pWaiter)
1736{
1737 pWaiter->pNext = NULL;
1738
1739 if (pEntry->pWaitingHead)
1740 {
1741 AssertPtr(pEntry->pWaitingTail);
1742
1743 pEntry->pWaitingTail->pNext = pWaiter;
1744 pEntry->pWaitingTail = pWaiter;
1745 }
1746 else
1747 {
1748 Assert(!pEntry->pWaitingTail);
1749
1750 pEntry->pWaitingHead = pWaiter;
1751 pEntry->pWaitingTail = pWaiter;
1752 }
1753}
1754
1755/**
1756 * Add a buffer described by the I/O memory context
1757 * to the entry waiting for completion.
1758 *
1759 * @returns VBox status code.
1760 * @param pEntry The entry to add the buffer to.
1761 * @param pReq The request.
1762 * @param pSgBuf The scatter/gather buffer. Will be advanced by cbData.
1763 * @param offDiff Offset from the start of the buffer in the entry.
1764 * @param cbData Amount of data to wait for onthis entry.
1765 * @param fWrite Flag whether the task waits because it wants to write to
1766 * the cache entry.
1767 */
1768static int pdmBlkCacheEntryWaitersAdd(PPDMBLKCACHEENTRY pEntry, PPDMBLKCACHEREQ pReq,
1769 PRTSGBUF pSgBuf, uint64_t offDiff, size_t cbData, bool fWrite)
1770{
1771 PPDMBLKCACHEWAITER pWaiter = (PPDMBLKCACHEWAITER)RTMemAllocZ(sizeof(PDMBLKCACHEWAITER));
1772 if (!pWaiter)
1773 return VERR_NO_MEMORY;
1774
1775 ASMAtomicIncU32(&pReq->cXfersPending);
1776 pWaiter->pReq = pReq;
1777 pWaiter->offCacheEntry = offDiff;
1778 pWaiter->cbTransfer = cbData;
1779 pWaiter->fWrite = fWrite;
1780 RTSgBufClone(&pWaiter->SgBuf, pSgBuf);
1781 RTSgBufAdvance(pSgBuf, cbData);
1782
1783 pdmBlkCacheEntryAddWaiter(pEntry, pWaiter);
1784
1785 return VINF_SUCCESS;
1786}
1787
1788/**
1789 * Calculate aligned offset and size for a new cache entry which do not
1790 * intersect with an already existing entry and the file end.
1791 *
1792 * @returns The number of bytes the entry can hold of the requested amount
1793 * of bytes.
1794 * @param pBlkCache The endpoint cache.
1795 * @param off The start offset.
1796 * @param cb The number of bytes the entry needs to hold at
1797 * least.
1798 * @param pcbEntry Where to store the number of bytes the entry can hold.
1799 * Can be less than given because of other entries.
1800 */
1801static uint32_t pdmBlkCacheEntryBoundariesCalc(PPDMBLKCACHE pBlkCache,
1802 uint64_t off, uint32_t cb,
1803 uint32_t *pcbEntry)
1804{
1805 /* Get the best fit entries around the offset */
1806 PPDMBLKCACHEENTRY pEntryAbove = NULL;
1807 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
1808
1809 /* Log the info */
1810 LogFlow(("%sest fit entry above off=%llu (BestFit=%llu BestFitEnd=%llu BestFitSize=%u)\n",
1811 pEntryAbove ? "B" : "No b",
1812 off,
1813 pEntryAbove ? pEntryAbove->Core.Key : 0,
1814 pEntryAbove ? pEntryAbove->Core.KeyLast : 0,
1815 pEntryAbove ? pEntryAbove->cbData : 0));
1816
1817 uint32_t cbNext;
1818 uint32_t cbInEntry;
1819 if ( pEntryAbove
1820 && off + cb > pEntryAbove->Core.Key)
1821 {
1822 cbInEntry = (uint32_t)(pEntryAbove->Core.Key - off);
1823 cbNext = (uint32_t)(pEntryAbove->Core.Key - off);
1824 }
1825 else
1826 {
1827 cbInEntry = cb;
1828 cbNext = cb;
1829 }
1830
1831 /* A few sanity checks */
1832 AssertMsg(!pEntryAbove || off + cbNext <= pEntryAbove->Core.Key,
1833 ("Aligned size intersects with another cache entry\n"));
1834 Assert(cbInEntry <= cbNext);
1835
1836 if (pEntryAbove)
1837 pdmBlkCacheEntryRelease(pEntryAbove);
1838
1839 LogFlow(("off=%llu cbNext=%u\n", off, cbNext));
1840
1841 *pcbEntry = cbNext;
1842
1843 return cbInEntry;
1844}
1845
1846/**
1847 * Create a new cache entry evicting data from the cache if required.
1848 *
1849 * @returns Pointer to the new cache entry or NULL
1850 * if not enough bytes could be evicted from the cache.
1851 * @param pBlkCache The endpoint cache.
1852 * @param off The offset.
1853 * @param cb Number of bytes the cache entry should have.
1854 * @param pcbData Where to store the number of bytes the new
1855 * entry can hold. May be lower than actually
1856 * requested due to another entry intersecting the
1857 * access range.
1858 */
1859static PPDMBLKCACHEENTRY pdmBlkCacheEntryCreate(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cb, size_t *pcbData)
1860{
1861 uint32_t cbEntry = 0;
1862
1863 *pcbData = pdmBlkCacheEntryBoundariesCalc(pBlkCache, off, (uint32_t)cb, &cbEntry);
1864 AssertReturn(cb <= UINT32_MAX, NULL);
1865
1866 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1867 pdmBlkCacheLockEnter(pCache);
1868
1869 PPDMBLKCACHEENTRY pEntryNew = NULL;
1870 uint8_t *pbBuffer = NULL;
1871 bool fEnough = pdmBlkCacheReclaim(pCache, cbEntry, true, &pbBuffer);
1872 if (fEnough)
1873 {
1874 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbEntry));
1875
1876 pEntryNew = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, pbBuffer);
1877 if (RT_LIKELY(pEntryNew))
1878 {
1879 pdmBlkCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1880 pdmBlkCacheAdd(pCache, cbEntry);
1881 pdmBlkCacheLockLeave(pCache);
1882
1883 pdmBlkCacheInsertEntry(pBlkCache, pEntryNew);
1884
1885 AssertMsg( (off >= pEntryNew->Core.Key)
1886 && (off + *pcbData <= pEntryNew->Core.KeyLast + 1),
1887 ("Overflow in calculation off=%llu\n", off));
1888 }
1889 else
1890 pdmBlkCacheLockLeave(pCache);
1891 }
1892 else
1893 pdmBlkCacheLockLeave(pCache);
1894
1895 return pEntryNew;
1896}
1897
1898static PPDMBLKCACHEREQ pdmBlkCacheReqAlloc(void *pvUser)
1899{
1900 PPDMBLKCACHEREQ pReq = (PPDMBLKCACHEREQ)RTMemAlloc(sizeof(PDMBLKCACHEREQ));
1901
1902 if (RT_LIKELY(pReq))
1903 {
1904 pReq->pvUser = pvUser;
1905 pReq->rcReq = VINF_SUCCESS;
1906 pReq->cXfersPending = 0;
1907 }
1908
1909 return pReq;
1910}
1911
1912static void pdmBlkCacheReqComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq)
1913{
1914 switch (pBlkCache->enmType)
1915 {
1916 case PDMBLKCACHETYPE_DEV:
1917 {
1918 pBlkCache->u.Dev.pfnXferComplete(pBlkCache->u.Dev.pDevIns,
1919 pReq->pvUser, pReq->rcReq);
1920 break;
1921 }
1922 case PDMBLKCACHETYPE_DRV:
1923 {
1924 pBlkCache->u.Drv.pfnXferComplete(pBlkCache->u.Drv.pDrvIns,
1925 pReq->pvUser, pReq->rcReq);
1926 break;
1927 }
1928 case PDMBLKCACHETYPE_USB:
1929 {
1930 pBlkCache->u.Usb.pfnXferComplete(pBlkCache->u.Usb.pUsbIns,
1931 pReq->pvUser, pReq->rcReq);
1932 break;
1933 }
1934 case PDMBLKCACHETYPE_INTERNAL:
1935 {
1936 pBlkCache->u.Int.pfnXferComplete(pBlkCache->u.Int.pvUser,
1937 pReq->pvUser, pReq->rcReq);
1938 break;
1939 }
1940 default:
1941 AssertMsgFailed(("Unknown block cache type!\n"));
1942 }
1943
1944 RTMemFree(pReq);
1945}
1946
1947static bool pdmBlkCacheReqUpdate(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
1948 int rcReq, bool fCallHandler)
1949{
1950 if (RT_FAILURE(rcReq))
1951 ASMAtomicCmpXchgS32(&pReq->rcReq, rcReq, VINF_SUCCESS);
1952
1953 AssertMsg(pReq->cXfersPending > 0, ("No transfers are pending for this request\n"));
1954 uint32_t cXfersPending = ASMAtomicDecU32(&pReq->cXfersPending);
1955
1956 if (!cXfersPending)
1957 {
1958 if (fCallHandler)
1959 pdmBlkCacheReqComplete(pBlkCache, pReq);
1960 return true;
1961 }
1962
1963 LogFlowFunc(("pReq=%#p cXfersPending=%u\n", pReq, cXfersPending));
1964 return false;
1965}
1966
1967VMMR3DECL(int) PDMR3BlkCacheRead(PPDMBLKCACHE pBlkCache, uint64_t off,
1968 PCRTSGBUF pSgBuf, size_t cbRead, void *pvUser)
1969{
1970 int rc = VINF_SUCCESS;
1971 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1972 PPDMBLKCACHEENTRY pEntry;
1973 PPDMBLKCACHEREQ pReq;
1974
1975 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pSgBuf=%#p cbRead=%u pvUser=%#p\n",
1976 pBlkCache, pBlkCache->pszId, off, pSgBuf, cbRead, pvUser));
1977
1978 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
1979 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
1980
1981 RTSGBUF SgBuf;
1982 RTSgBufClone(&SgBuf, pSgBuf);
1983
1984 /* Allocate new request structure. */
1985 pReq = pdmBlkCacheReqAlloc(pvUser);
1986 if (RT_UNLIKELY(!pReq))
1987 return VERR_NO_MEMORY;
1988
1989 /* Increment data transfer counter to keep the request valid while we access it. */
1990 ASMAtomicIncU32(&pReq->cXfersPending);
1991
1992 while (cbRead)
1993 {
1994 size_t cbToRead;
1995
1996 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
1997
1998 /*
1999 * If there is no entry we try to create a new one eviciting unused pages
2000 * if the cache is full. If this is not possible we will pass the request through
2001 * and skip the caching (all entries may be still in progress so they can't
2002 * be evicted)
2003 * If we have an entry it can be in one of the LRU lists where the entry
2004 * contains data (recently used or frequently used LRU) so we can just read
2005 * the data we need and put the entry at the head of the frequently used LRU list.
2006 * In case the entry is in one of the ghost lists it doesn't contain any data.
2007 * We have to fetch it again evicting pages from either T1 or T2 to make room.
2008 */
2009 if (pEntry)
2010 {
2011 uint64_t offDiff = off - pEntry->Core.Key;
2012
2013 AssertMsg(off >= pEntry->Core.Key,
2014 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
2015 off, pEntry->Core.Key));
2016
2017 AssertPtr(pEntry->pList);
2018
2019 cbToRead = RT_MIN(pEntry->cbData - offDiff, cbRead);
2020
2021 AssertMsg(off + cbToRead <= pEntry->Core.Key + pEntry->Core.KeyLast + 1,
2022 ("Buffer of cache entry exceeded off=%llu cbToRead=%d\n",
2023 off, cbToRead));
2024
2025 cbRead -= cbToRead;
2026
2027 if (!cbRead)
2028 STAM_COUNTER_INC(&pCache->cHits);
2029 else
2030 STAM_COUNTER_INC(&pCache->cPartialHits);
2031
2032 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
2033
2034 /* Ghost lists contain no data. */
2035 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2036 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2037 {
2038 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2039 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2040 PDMBLKCACHE_ENTRY_IS_DIRTY))
2041 {
2042 /* Entry didn't completed yet. Append to the list */
2043 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2044 &SgBuf, offDiff, cbToRead,
2045 false /* fWrite */);
2046 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2047 }
2048 else
2049 {
2050 /* Read as much as we can from the entry. */
2051 RTSgBufCopyFromBuf(&SgBuf, pEntry->pbData + offDiff, cbToRead);
2052 }
2053
2054 /* Move this entry to the top position */
2055 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2056 {
2057 pdmBlkCacheLockEnter(pCache);
2058 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2059 pdmBlkCacheLockLeave(pCache);
2060 }
2061 /* Release the entry */
2062 pdmBlkCacheEntryRelease(pEntry);
2063 }
2064 else
2065 {
2066 uint8_t *pbBuffer = NULL;
2067
2068 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
2069
2070 pdmBlkCacheLockEnter(pCache);
2071 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2072 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2073
2074 /* Move the entry to Am and fetch it to the cache. */
2075 if (fEnough)
2076 {
2077 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2078 pdmBlkCacheAdd(pCache, pEntry->cbData);
2079 pdmBlkCacheLockLeave(pCache);
2080
2081 if (pbBuffer)
2082 pEntry->pbData = pbBuffer;
2083 else
2084 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2085 AssertPtr(pEntry->pbData);
2086
2087 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2088 &SgBuf, offDiff, cbToRead,
2089 false /* fWrite */);
2090 pdmBlkCacheEntryReadFromMedium(pEntry);
2091 /* Release the entry */
2092 pdmBlkCacheEntryRelease(pEntry);
2093 }
2094 else
2095 {
2096 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2097 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2098 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2099 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2100 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2101
2102 pdmBlkCacheLockLeave(pCache);
2103
2104 RTMemFree(pEntry);
2105
2106 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2107 &SgBuf, off, cbToRead,
2108 PDMBLKCACHEXFERDIR_READ);
2109 }
2110 }
2111 }
2112 else
2113 {
2114#ifdef VBOX_WITH_IO_READ_CACHE
2115 /* No entry found for this offset. Create a new entry and fetch the data to the cache. */
2116 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2117 off, cbRead,
2118 &cbToRead);
2119
2120 cbRead -= cbToRead;
2121
2122 if (pEntryNew)
2123 {
2124 if (!cbRead)
2125 STAM_COUNTER_INC(&pCache->cMisses);
2126 else
2127 STAM_COUNTER_INC(&pCache->cPartialHits);
2128
2129 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2130 &SgBuf,
2131 off - pEntryNew->Core.Key,
2132 cbToRead,
2133 false /* fWrite */);
2134 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2135 pdmBlkCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
2136 }
2137 else
2138 {
2139 /*
2140 * There is not enough free space in the cache.
2141 * Pass the request directly to the I/O manager.
2142 */
2143 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead));
2144
2145 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2146 &SgBuf, off, cbToRead,
2147 PDMBLKCACHEXFERDIR_READ);
2148 }
2149#else
2150 /* Clip read size if necessary. */
2151 PPDMBLKCACHEENTRY pEntryAbove;
2152 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
2153
2154 if (pEntryAbove)
2155 {
2156 if (off + cbRead > pEntryAbove->Core.Key)
2157 cbToRead = pEntryAbove->Core.Key - off;
2158 else
2159 cbToRead = cbRead;
2160
2161 pdmBlkCacheEntryRelease(pEntryAbove);
2162 }
2163 else
2164 cbToRead = cbRead;
2165
2166 cbRead -= cbToRead;
2167 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2168 &SgBuf, off, cbToRead,
2169 PDMBLKCACHEXFERDIR_READ);
2170#endif
2171 }
2172 off += cbToRead;
2173 }
2174
2175 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2176 rc = VINF_AIO_TASK_PENDING;
2177 else
2178 {
2179 rc = pReq->rcReq;
2180 RTMemFree(pReq);
2181 }
2182
2183 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2184
2185 return rc;
2186}
2187
2188VMMR3DECL(int) PDMR3BlkCacheWrite(PPDMBLKCACHE pBlkCache, uint64_t off, PCRTSGBUF pSgBuf, size_t cbWrite, void *pvUser)
2189{
2190 int rc = VINF_SUCCESS;
2191 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2192 PPDMBLKCACHEENTRY pEntry;
2193 PPDMBLKCACHEREQ pReq;
2194
2195 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pSgBuf=%#p cbWrite=%u pvUser=%#p\n",
2196 pBlkCache, pBlkCache->pszId, off, pSgBuf, cbWrite, pvUser));
2197
2198 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2199 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2200
2201 RTSGBUF SgBuf;
2202 RTSgBufClone(&SgBuf, pSgBuf);
2203
2204 /* Allocate new request structure. */
2205 pReq = pdmBlkCacheReqAlloc(pvUser);
2206 if (RT_UNLIKELY(!pReq))
2207 return VERR_NO_MEMORY;
2208
2209 /* Increment data transfer counter to keep the request valid while we access it. */
2210 ASMAtomicIncU32(&pReq->cXfersPending);
2211
2212 while (cbWrite)
2213 {
2214 size_t cbToWrite;
2215
2216 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
2217 if (pEntry)
2218 {
2219 /* Write the data into the entry and mark it as dirty */
2220 AssertPtr(pEntry->pList);
2221
2222 uint64_t offDiff = off - pEntry->Core.Key;
2223 AssertMsg(off >= pEntry->Core.Key, ("Overflow in calculation off=%llu OffsetAligned=%llu\n", off, pEntry->Core.Key));
2224
2225 cbToWrite = RT_MIN(pEntry->cbData - offDiff, cbWrite);
2226 cbWrite -= cbToWrite;
2227
2228 if (!cbWrite)
2229 STAM_COUNTER_INC(&pCache->cHits);
2230 else
2231 STAM_COUNTER_INC(&pCache->cPartialHits);
2232
2233 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2234
2235 /* Ghost lists contain no data. */
2236 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2237 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2238 {
2239 /* Check if the entry is dirty. */
2240 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2241 PDMBLKCACHE_ENTRY_IS_DIRTY,
2242 0))
2243 {
2244 /* If it is already dirty but not in progress just update the data. */
2245 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2246 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2247 else
2248 {
2249 /* The data isn't written to the file yet */
2250 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2251 &SgBuf, offDiff, cbToWrite,
2252 true /* fWrite */);
2253 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2254 }
2255
2256 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2257 }
2258 else /* Dirty bit not set */
2259 {
2260 /*
2261 * Check if a read is in progress for this entry.
2262 * We have to defer processing in that case.
2263 */
2264 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2265 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2266 0))
2267 {
2268 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2269 &SgBuf, offDiff, cbToWrite,
2270 true /* fWrite */);
2271 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2272 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2273 }
2274 else /* I/O in progress flag not set */
2275 {
2276 /* Write as much as we can into the entry and update the file. */
2277 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2278
2279 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2280 if (fCommit)
2281 pdmBlkCacheCommitDirtyEntries(pCache);
2282 }
2283 } /* Dirty bit not set */
2284
2285 /* Move this entry to the top position */
2286 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2287 {
2288 pdmBlkCacheLockEnter(pCache);
2289 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2290 pdmBlkCacheLockLeave(pCache);
2291 }
2292
2293 pdmBlkCacheEntryRelease(pEntry);
2294 }
2295 else /* Entry is on the ghost list */
2296 {
2297 uint8_t *pbBuffer = NULL;
2298
2299 pdmBlkCacheLockEnter(pCache);
2300 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2301 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2302
2303 if (fEnough)
2304 {
2305 /* Move the entry to Am and fetch it to the cache. */
2306 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2307 pdmBlkCacheAdd(pCache, pEntry->cbData);
2308 pdmBlkCacheLockLeave(pCache);
2309
2310 if (pbBuffer)
2311 pEntry->pbData = pbBuffer;
2312 else
2313 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2314 AssertPtr(pEntry->pbData);
2315
2316 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2317 &SgBuf, offDiff, cbToWrite,
2318 true /* fWrite */);
2319 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2320 pdmBlkCacheEntryReadFromMedium(pEntry);
2321
2322 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
2323 pdmBlkCacheEntryRelease(pEntry);
2324 }
2325 else
2326 {
2327 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2328 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2329 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2330 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2331 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2332
2333 pdmBlkCacheLockLeave(pCache);
2334
2335 RTMemFree(pEntry);
2336 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2337 &SgBuf, off, cbToWrite,
2338 PDMBLKCACHEXFERDIR_WRITE);
2339 }
2340 }
2341 }
2342 else /* No entry found */
2343 {
2344 /*
2345 * No entry found. Try to create a new cache entry to store the data in and if that fails
2346 * write directly to the file.
2347 */
2348 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2349 off, cbWrite,
2350 &cbToWrite);
2351
2352 cbWrite -= cbToWrite;
2353
2354 if (pEntryNew)
2355 {
2356 uint64_t offDiff = off - pEntryNew->Core.Key;
2357
2358 STAM_COUNTER_INC(&pCache->cHits);
2359
2360 /*
2361 * Check if it is possible to just write the data without waiting
2362 * for it to get fetched first.
2363 */
2364 if (!offDiff && pEntryNew->cbData == cbToWrite)
2365 {
2366 RTSgBufCopyToBuf(&SgBuf, pEntryNew->pbData, cbToWrite);
2367
2368 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntryNew);
2369 if (fCommit)
2370 pdmBlkCacheCommitDirtyEntries(pCache);
2371 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2372 }
2373 else
2374 {
2375 /* Defer the write and fetch the data from the endpoint. */
2376 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2377 &SgBuf, offDiff, cbToWrite,
2378 true /* fWrite */);
2379 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2380 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2381 }
2382
2383 pdmBlkCacheEntryRelease(pEntryNew);
2384 }
2385 else
2386 {
2387 /*
2388 * There is not enough free space in the cache.
2389 * Pass the request directly to the I/O manager.
2390 */
2391 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite));
2392
2393 STAM_COUNTER_INC(&pCache->cMisses);
2394
2395 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2396 &SgBuf, off, cbToWrite,
2397 PDMBLKCACHEXFERDIR_WRITE);
2398 }
2399 }
2400
2401 off += cbToWrite;
2402 }
2403
2404 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2405 rc = VINF_AIO_TASK_PENDING;
2406 else
2407 {
2408 rc = pReq->rcReq;
2409 RTMemFree(pReq);
2410 }
2411
2412 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2413
2414 return rc;
2415}
2416
2417VMMR3DECL(int) PDMR3BlkCacheFlush(PPDMBLKCACHE pBlkCache, void *pvUser)
2418{
2419 int rc = VINF_SUCCESS;
2420 PPDMBLKCACHEREQ pReq;
2421
2422 LogFlowFunc((": pBlkCache=%#p{%s}\n", pBlkCache, pBlkCache->pszId));
2423
2424 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2425 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2426
2427 /* Commit dirty entries in the cache. */
2428 pdmBlkCacheCommit(pBlkCache);
2429
2430 /* Allocate new request structure. */
2431 pReq = pdmBlkCacheReqAlloc(pvUser);
2432 if (RT_UNLIKELY(!pReq))
2433 return VERR_NO_MEMORY;
2434
2435 rc = pdmBlkCacheRequestPassthrough(pBlkCache, pReq, NULL, 0, 0,
2436 PDMBLKCACHEXFERDIR_FLUSH);
2437 AssertRC(rc);
2438
2439 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2440 return VINF_AIO_TASK_PENDING;
2441}
2442
2443VMMR3DECL(int) PDMR3BlkCacheDiscard(PPDMBLKCACHE pBlkCache, PCRTRANGE paRanges,
2444 unsigned cRanges, void *pvUser)
2445{
2446 int rc = VINF_SUCCESS;
2447 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2448 PPDMBLKCACHEENTRY pEntry;
2449 PPDMBLKCACHEREQ pReq;
2450
2451 LogFlowFunc((": pBlkCache=%#p{%s} paRanges=%#p cRanges=%u pvUser=%#p\n",
2452 pBlkCache, pBlkCache->pszId, paRanges, cRanges, pvUser));
2453
2454 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2455 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2456
2457 /* Allocate new request structure. */
2458 pReq = pdmBlkCacheReqAlloc(pvUser);
2459 if (RT_UNLIKELY(!pReq))
2460 return VERR_NO_MEMORY;
2461
2462 /* Increment data transfer counter to keep the request valid while we access it. */
2463 ASMAtomicIncU32(&pReq->cXfersPending);
2464
2465 for (unsigned i = 0; i < cRanges; i++)
2466 {
2467 uint64_t offCur = paRanges[i].offStart;
2468 size_t cbLeft = paRanges[i].cbRange;
2469
2470 while (cbLeft)
2471 {
2472 size_t cbThisDiscard = 0;
2473
2474 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, offCur);
2475
2476 if (pEntry)
2477 {
2478 /* Write the data into the entry and mark it as dirty */
2479 AssertPtr(pEntry->pList);
2480
2481 uint64_t offDiff = offCur - pEntry->Core.Key;
2482
2483 AssertMsg(offCur >= pEntry->Core.Key,
2484 ("Overflow in calculation offCur=%llu OffsetAligned=%llu\n",
2485 offCur, pEntry->Core.Key));
2486
2487 cbThisDiscard = RT_MIN(pEntry->cbData - offDiff, cbLeft);
2488
2489 /* Ghost lists contain no data. */
2490 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2491 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2492 {
2493 /* Check if the entry is dirty. */
2494 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2495 PDMBLKCACHE_ENTRY_IS_DIRTY,
2496 0))
2497 {
2498 /* If it is dirty but not yet in progress remove it. */
2499 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2500 {
2501 pdmBlkCacheLockEnter(pCache);
2502 pdmBlkCacheEntryRemoveFromList(pEntry);
2503
2504 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2505 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2506 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2507
2508 pdmBlkCacheLockLeave(pCache);
2509
2510 RTMemFree(pEntry);
2511 }
2512 else
2513 {
2514#if 0
2515 /* The data isn't written to the file yet */
2516 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2517 &SgBuf, offDiff, cbToWrite,
2518 true /* fWrite */);
2519 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2520#endif
2521 }
2522
2523 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2524 pdmBlkCacheEntryRelease(pEntry);
2525 }
2526 else /* Dirty bit not set */
2527 {
2528 /*
2529 * Check if a read is in progress for this entry.
2530 * We have to defer processing in that case.
2531 */
2532 if(pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2533 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2534 0))
2535 {
2536#if 0
2537 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2538 &SgBuf, offDiff, cbToWrite,
2539 true /* fWrite */);
2540#endif
2541 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2542 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2543 pdmBlkCacheEntryRelease(pEntry);
2544 }
2545 else /* I/O in progress flag not set */
2546 {
2547 pdmBlkCacheLockEnter(pCache);
2548 pdmBlkCacheEntryRemoveFromList(pEntry);
2549
2550 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2551 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2552 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2553 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2554 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2555
2556 pdmBlkCacheLockLeave(pCache);
2557
2558 RTMemFree(pEntry);
2559 }
2560 } /* Dirty bit not set */
2561 }
2562 else /* Entry is on the ghost list just remove cache entry. */
2563 {
2564 pdmBlkCacheLockEnter(pCache);
2565 pdmBlkCacheEntryRemoveFromList(pEntry);
2566
2567 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2568 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2569 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2570 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2571 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2572
2573 pdmBlkCacheLockLeave(pCache);
2574
2575 RTMemFree(pEntry);
2576 }
2577 }
2578 /* else: no entry found. */
2579
2580 offCur += cbThisDiscard;
2581 cbLeft -= cbThisDiscard;
2582 }
2583 }
2584
2585 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2586 rc = VINF_AIO_TASK_PENDING;
2587 else
2588 {
2589 rc = pReq->rcReq;
2590 RTMemFree(pReq);
2591 }
2592
2593 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2594
2595 return rc;
2596}
2597
2598/**
2599 * Completes a task segment freeing all resources and completes the task handle
2600 * if everything was transferred.
2601 *
2602 * @returns Next task segment handle.
2603 * @param pBlkCache The endpoint block cache.
2604 * @param pWaiter Task segment to complete.
2605 * @param rc Status code to set.
2606 */
2607static PPDMBLKCACHEWAITER pdmBlkCacheWaiterComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEWAITER pWaiter, int rc)
2608{
2609 PPDMBLKCACHEWAITER pNext = pWaiter->pNext;
2610 PPDMBLKCACHEREQ pReq = pWaiter->pReq;
2611
2612 pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, true);
2613
2614 RTMemFree(pWaiter);
2615
2616 return pNext;
2617}
2618
2619static void pdmBlkCacheIoXferCompleteEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2620{
2621 PPDMBLKCACHEENTRY pEntry = hIoXfer->pEntry;
2622 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2623
2624 /* Reference the entry now as we are clearing the I/O in progress flag
2625 * which protected the entry till now. */
2626 pdmBlkCacheEntryRef(pEntry);
2627
2628 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2629 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
2630
2631 /* Process waiting segment list. The data in entry might have changed in-between. */
2632 bool fDirty = false;
2633 PPDMBLKCACHEWAITER pComplete = pEntry->pWaitingHead;
2634 PPDMBLKCACHEWAITER pCurr = pComplete;
2635
2636 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
2637 ("The list tail was not updated correctly\n"));
2638 pEntry->pWaitingTail = NULL;
2639 pEntry->pWaitingHead = NULL;
2640
2641 if (hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_WRITE)
2642 {
2643 /*
2644 * An error here is difficult to handle as the original request completed already.
2645 * The error is logged for now and the VM is paused.
2646 * If the user continues the entry is written again in the hope
2647 * the user fixed the problem and the next write succeeds.
2648 */
2649 if (RT_FAILURE(rcIoXfer))
2650 {
2651 LogRel(("I/O cache: Error while writing entry at offset %llu (%u bytes) to medium \"%s\" (rc=%Rrc)\n",
2652 pEntry->Core.Key, pEntry->cbData, pBlkCache->pszId, rcIoXfer));
2653
2654 if (!ASMAtomicXchgBool(&pCache->fIoErrorVmSuspended, true))
2655 {
2656 int rc = VMSetRuntimeError(pCache->pVM, VMSETRTERR_FLAGS_SUSPEND | VMSETRTERR_FLAGS_NO_WAIT, "BLKCACHE_IOERR",
2657 N_("The I/O cache encountered an error while updating data in medium \"%s\" (rc=%Rrc). "
2658 "Make sure there is enough free space on the disk and that the disk is working properly. "
2659 "Operation can be resumed afterwards"),
2660 pBlkCache->pszId, rcIoXfer);
2661 AssertRC(rc);
2662 }
2663
2664 /* Mark the entry as dirty again to get it added to the list later on. */
2665 fDirty = true;
2666 }
2667
2668 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IS_DIRTY;
2669
2670 while (pCurr)
2671 {
2672 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
2673
2674 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2675 fDirty = true;
2676 pCurr = pCurr->pNext;
2677 }
2678 }
2679 else
2680 {
2681 AssertMsg(hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_READ, ("Invalid transfer type\n"));
2682 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY),
2683 ("Invalid flags set\n"));
2684
2685 while (pCurr)
2686 {
2687 if (pCurr->fWrite)
2688 {
2689 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2690 fDirty = true;
2691 }
2692 else
2693 RTSgBufCopyFromBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2694
2695 pCurr = pCurr->pNext;
2696 }
2697 }
2698
2699 bool fCommit = false;
2700 if (fDirty)
2701 fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2702
2703 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2704
2705 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
2706 pdmBlkCacheEntryRelease(pEntry);
2707
2708 if (fCommit)
2709 pdmBlkCacheCommitDirtyEntries(pCache);
2710
2711 /* Complete waiters now. */
2712 while (pComplete)
2713 pComplete = pdmBlkCacheWaiterComplete(pBlkCache, pComplete, rcIoXfer);
2714}
2715
2716VMMR3DECL(void) PDMR3BlkCacheIoXferComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2717{
2718 LogFlowFunc(("pBlkCache=%#p hIoXfer=%#p rcIoXfer=%Rrc\n", pBlkCache, hIoXfer, rcIoXfer));
2719
2720 if (hIoXfer->fIoCache)
2721 pdmBlkCacheIoXferCompleteEntry(pBlkCache, hIoXfer, rcIoXfer);
2722 else
2723 pdmBlkCacheReqUpdate(pBlkCache, hIoXfer->pReq, rcIoXfer, true);
2724
2725 ASMAtomicDecU32(&pBlkCache->cIoXfersActive);
2726 pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: I/O req %#p (%RTbool) completed (%u now active)",
2727 hIoXfer, hIoXfer->fIoCache, pBlkCache->cIoXfersActive);
2728 RTMemFree(hIoXfer);
2729}
2730
2731/**
2732 * Callback for the AVL do with all routine. Waits for a cachen entry to finish any pending I/O.
2733 *
2734 * @returns IPRT status code.
2735 * @param pNode The node to destroy.
2736 * @param pvUser Opaque user data.
2737 */
2738static DECLCALLBACK(int) pdmBlkCacheEntryQuiesce(PAVLRU64NODECORE pNode, void *pvUser)
2739{
2740 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
2741 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
2742 NOREF(pvUser);
2743
2744 while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)
2745 {
2746 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
2747 pdmBlkCacheEntryRef(pEntry);
2748 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2749
2750 RTThreadSleep(1);
2751
2752 /* Re-enter all locks and drop the reference. */
2753 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2754 pdmBlkCacheEntryRelease(pEntry);
2755 }
2756
2757 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
2758 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
2759
2760 return VINF_SUCCESS;
2761}
2762
2763VMMR3DECL(int) PDMR3BlkCacheSuspend(PPDMBLKCACHE pBlkCache)
2764{
2765 int rc = VINF_SUCCESS;
2766 LogFlowFunc(("pBlkCache=%#p\n", pBlkCache));
2767
2768 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2769
2770 if (!ASMAtomicReadBool(&pBlkCache->pCache->fIoErrorVmSuspended))
2771 pdmBlkCacheCommit(pBlkCache); /* Can issue new I/O requests. */
2772 ASMAtomicXchgBool(&pBlkCache->fSuspended, true);
2773
2774 /* Wait for all I/O to complete. */
2775 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2776 rc = RTAvlrU64DoWithAll(pBlkCache->pTree, true, pdmBlkCacheEntryQuiesce, NULL);
2777 AssertRC(rc);
2778 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2779
2780 return rc;
2781}
2782
2783VMMR3DECL(int) PDMR3BlkCacheResume(PPDMBLKCACHE pBlkCache)
2784{
2785 LogFlowFunc(("pBlkCache=%#p\n", pBlkCache));
2786
2787 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2788
2789 ASMAtomicXchgBool(&pBlkCache->fSuspended, false);
2790
2791 return VINF_SUCCESS;
2792}
2793
2794VMMR3DECL(int) PDMR3BlkCacheClear(PPDMBLKCACHE pBlkCache)
2795{
2796 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2797
2798 /*
2799 * Commit all dirty entries now (they are waited on for completion during the
2800 * destruction of the AVL tree below).
2801 * The exception is if the VM was paused because of an I/O error before.
2802 */
2803 if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
2804 pdmBlkCacheCommit(pBlkCache);
2805
2806 /* Make sure nobody is accessing the cache while we delete the tree. */
2807 pdmBlkCacheLockEnter(pCache);
2808 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2809 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
2810 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2811
2812 pdmBlkCacheLockLeave(pCache);
2813 return VINF_SUCCESS;
2814}
2815
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use