VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PDMBlkCache.cpp@ 74795

Last change on this file since 74795 was 72054, checked in by vboxsync, 6 years ago

*: RTListMove no longer need the call to init the destination list to workaround buggy behavior when when the source is empty.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 99.8 KB
Line 
1/* $Id: PDMBlkCache.cpp 72054 2018-04-27 09:18:51Z vboxsync $ */
2/** @file
3 * PDM Block Cache.
4 */
5
6/*
7 * Copyright (C) 2006-2017 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/** @page pg_pdm_block_cache PDM Block Cache - The I/O cache
19 * This component implements an I/O cache based on the 2Q cache algorithm.
20 */
21
22
23/*********************************************************************************************************************************
24* Header Files *
25*********************************************************************************************************************************/
26#define LOG_GROUP LOG_GROUP_PDM_BLK_CACHE
27#include "PDMInternal.h"
28#include <iprt/asm.h>
29#include <iprt/mem.h>
30#include <iprt/path.h>
31#include <iprt/string.h>
32#include <iprt/trace.h>
33#include <VBox/log.h>
34#include <VBox/vmm/stam.h>
35#include <VBox/vmm/uvm.h>
36#include <VBox/vmm/vm.h>
37
38#include "PDMBlkCacheInternal.h"
39
40#ifdef VBOX_STRICT
41# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
42 do \
43 { \
44 AssertMsg(RTCritSectIsOwner(&Cache->CritSect), \
45 ("Thread does not own critical section\n"));\
46 } while (0)
47
48# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) \
49 do \
50 { \
51 AssertMsg(RTSemRWIsWriteOwner(pEpCache->SemRWEntries), \
52 ("Thread is not exclusive owner of the per endpoint RW semaphore\n")); \
53 } while (0)
54
55# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) \
56 do \
57 { \
58 AssertMsg(RTSemRWIsReadOwner(pEpCache->SemRWEntries), \
59 ("Thread is not read owner of the per endpoint RW semaphore\n")); \
60 } while (0)
61
62#else
63# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while (0)
64# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) do { } while (0)
65# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) do { } while (0)
66#endif
67
68#define PDM_BLK_CACHE_SAVED_STATE_VERSION 1
69
70/* Enable to enable some tracing in the block cache code for investigating issues. */
71/*#define VBOX_BLKCACHE_TRACING 1*/
72
73
74/*********************************************************************************************************************************
75* Internal Functions *
76*********************************************************************************************************************************/
77
78static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache,
79 uint64_t off, size_t cbData, uint8_t *pbBuffer);
80static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry);
81
82
83/**
84 * Add message to the VM trace buffer.
85 *
86 * @returns nothing.
87 * @param pBlkCache The block cache.
88 * @param pszFmt The format string.
89 * @param ... Additional parameters for the string formatter.
90 */
91DECLINLINE(void) pdmBlkCacheR3TraceMsgF(PPDMBLKCACHE pBlkCache, const char *pszFmt, ...)
92{
93#if defined(VBOX_BLKCACHE_TRACING)
94 va_list va;
95 va_start(va, pszFmt);
96 RTTraceBufAddMsgV(pBlkCache->pCache->pVM->CTX_SUFF(hTraceBuf), pszFmt, va);
97 va_end(va);
98#else
99 RT_NOREF2(pBlkCache, pszFmt);
100#endif
101}
102
103/**
104 * Decrement the reference counter of the given cache entry.
105 *
106 * @returns nothing.
107 * @param pEntry The entry to release.
108 */
109DECLINLINE(void) pdmBlkCacheEntryRelease(PPDMBLKCACHEENTRY pEntry)
110{
111 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
112 ASMAtomicDecU32(&pEntry->cRefs);
113}
114
115/**
116 * Increment the reference counter of the given cache entry.
117 *
118 * @returns nothing.
119 * @param pEntry The entry to reference.
120 */
121DECLINLINE(void) pdmBlkCacheEntryRef(PPDMBLKCACHEENTRY pEntry)
122{
123 ASMAtomicIncU32(&pEntry->cRefs);
124}
125
126#ifdef VBOX_STRICT
127static void pdmBlkCacheValidate(PPDMBLKCACHEGLOBAL pCache)
128{
129 /* Amount of cached data should never exceed the maximum amount. */
130 AssertMsg(pCache->cbCached <= pCache->cbMax,
131 ("Current amount of cached data exceeds maximum\n"));
132
133 /* The amount of cached data in the LRU and FRU list should match cbCached */
134 AssertMsg(pCache->LruRecentlyUsedIn.cbCached + pCache->LruFrequentlyUsed.cbCached == pCache->cbCached,
135 ("Amount of cached data doesn't match\n"));
136
137 AssertMsg(pCache->LruRecentlyUsedOut.cbCached <= pCache->cbRecentlyUsedOutMax,
138 ("Paged out list exceeds maximum\n"));
139}
140#endif
141
142DECLINLINE(void) pdmBlkCacheLockEnter(PPDMBLKCACHEGLOBAL pCache)
143{
144 RTCritSectEnter(&pCache->CritSect);
145#ifdef VBOX_STRICT
146 pdmBlkCacheValidate(pCache);
147#endif
148}
149
150DECLINLINE(void) pdmBlkCacheLockLeave(PPDMBLKCACHEGLOBAL pCache)
151{
152#ifdef VBOX_STRICT
153 pdmBlkCacheValidate(pCache);
154#endif
155 RTCritSectLeave(&pCache->CritSect);
156}
157
158DECLINLINE(void) pdmBlkCacheSub(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
159{
160 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
161 pCache->cbCached -= cbAmount;
162}
163
164DECLINLINE(void) pdmBlkCacheAdd(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
165{
166 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
167 pCache->cbCached += cbAmount;
168}
169
170DECLINLINE(void) pdmBlkCacheListAdd(PPDMBLKLRULIST pList, uint32_t cbAmount)
171{
172 pList->cbCached += cbAmount;
173}
174
175DECLINLINE(void) pdmBlkCacheListSub(PPDMBLKLRULIST pList, uint32_t cbAmount)
176{
177 pList->cbCached -= cbAmount;
178}
179
180#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
181/**
182 * Checks consistency of a LRU list.
183 *
184 * @returns nothing
185 * @param pList The LRU list to check.
186 * @param pNotInList Element which is not allowed to occur in the list.
187 */
188static void pdmBlkCacheCheckList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pNotInList)
189{
190 PPDMBLKCACHEENTRY pCurr = pList->pHead;
191
192 /* Check that there are no double entries and no cycles in the list. */
193 while (pCurr)
194 {
195 PPDMBLKCACHEENTRY pNext = pCurr->pNext;
196
197 while (pNext)
198 {
199 AssertMsg(pCurr != pNext,
200 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
201 pCurr, pList));
202 pNext = pNext->pNext;
203 }
204
205 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
206
207 if (!pCurr->pNext)
208 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
209
210 pCurr = pCurr->pNext;
211 }
212}
213#endif
214
215/**
216 * Unlinks a cache entry from the LRU list it is assigned to.
217 *
218 * @returns nothing.
219 * @param pEntry The entry to unlink.
220 */
221static void pdmBlkCacheEntryRemoveFromList(PPDMBLKCACHEENTRY pEntry)
222{
223 PPDMBLKLRULIST pList = pEntry->pList;
224 PPDMBLKCACHEENTRY pPrev, pNext;
225
226 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
227
228 AssertPtr(pList);
229
230#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
231 pdmBlkCacheCheckList(pList, NULL);
232#endif
233
234 pPrev = pEntry->pPrev;
235 pNext = pEntry->pNext;
236
237 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
238 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
239
240 if (pPrev)
241 pPrev->pNext = pNext;
242 else
243 {
244 pList->pHead = pNext;
245
246 if (pNext)
247 pNext->pPrev = NULL;
248 }
249
250 if (pNext)
251 pNext->pPrev = pPrev;
252 else
253 {
254 pList->pTail = pPrev;
255
256 if (pPrev)
257 pPrev->pNext = NULL;
258 }
259
260 pEntry->pList = NULL;
261 pEntry->pPrev = NULL;
262 pEntry->pNext = NULL;
263 pdmBlkCacheListSub(pList, pEntry->cbData);
264#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
265 pdmBlkCacheCheckList(pList, pEntry);
266#endif
267}
268
269/**
270 * Adds a cache entry to the given LRU list unlinking it from the currently
271 * assigned list if needed.
272 *
273 * @returns nothing.
274 * @param pList List to the add entry to.
275 * @param pEntry Entry to add.
276 */
277static void pdmBlkCacheEntryAddToList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pEntry)
278{
279 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
280#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
281 pdmBlkCacheCheckList(pList, NULL);
282#endif
283
284 /* Remove from old list if needed */
285 if (pEntry->pList)
286 pdmBlkCacheEntryRemoveFromList(pEntry);
287
288 pEntry->pNext = pList->pHead;
289 if (pList->pHead)
290 pList->pHead->pPrev = pEntry;
291 else
292 {
293 Assert(!pList->pTail);
294 pList->pTail = pEntry;
295 }
296
297 pEntry->pPrev = NULL;
298 pList->pHead = pEntry;
299 pdmBlkCacheListAdd(pList, pEntry->cbData);
300 pEntry->pList = pList;
301#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
302 pdmBlkCacheCheckList(pList, NULL);
303#endif
304}
305
306/**
307 * Destroys a LRU list freeing all entries.
308 *
309 * @returns nothing
310 * @param pList Pointer to the LRU list to destroy.
311 *
312 * @note The caller must own the critical section of the cache.
313 */
314static void pdmBlkCacheDestroyList(PPDMBLKLRULIST pList)
315{
316 while (pList->pHead)
317 {
318 PPDMBLKCACHEENTRY pEntry = pList->pHead;
319
320 pList->pHead = pEntry->pNext;
321
322 AssertMsg(!(pEntry->fFlags & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY)),
323 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
324
325 RTMemPageFree(pEntry->pbData, pEntry->cbData);
326 RTMemFree(pEntry);
327 }
328}
329
330/**
331 * Tries to remove the given amount of bytes from a given list in the cache
332 * moving the entries to one of the given ghosts lists
333 *
334 * @returns Amount of data which could be freed.
335 * @param pCache Pointer to the global cache data.
336 * @param cbData The amount of the data to free.
337 * @param pListSrc The source list to evict data from.
338 * @param pGhostListDst Where the ghost list removed entries should be
339 * moved to, NULL if the entry should be freed.
340 * @param fReuseBuffer Flag whether a buffer should be reused if it has
341 * the same size
342 * @param ppbBuffer Where to store the address of the buffer if an
343 * entry with the same size was found and
344 * fReuseBuffer is true.
345 *
346 * @note This function may return fewer bytes than requested because entries
347 * may be marked as non evictable if they are used for I/O at the
348 * moment.
349 */
350static size_t pdmBlkCacheEvictPagesFrom(PPDMBLKCACHEGLOBAL pCache, size_t cbData,
351 PPDMBLKLRULIST pListSrc, PPDMBLKLRULIST pGhostListDst,
352 bool fReuseBuffer, uint8_t **ppbBuffer)
353{
354 size_t cbEvicted = 0;
355
356 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
357
358 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
359 AssertMsg( !pGhostListDst
360 || (pGhostListDst == &pCache->LruRecentlyUsedOut),
361 ("Destination list must be NULL or the recently used but paged out list\n"));
362
363 if (fReuseBuffer)
364 {
365 AssertPtr(ppbBuffer);
366 *ppbBuffer = NULL;
367 }
368
369 /* Start deleting from the tail. */
370 PPDMBLKCACHEENTRY pEntry = pListSrc->pTail;
371
372 while ((cbEvicted < cbData) && pEntry)
373 {
374 PPDMBLKCACHEENTRY pCurr = pEntry;
375
376 pEntry = pEntry->pPrev;
377
378 /* We can't evict pages which are currently in progress or dirty but not in progress */
379 if ( !(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
380 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
381 {
382 /* Ok eviction candidate. Grab the endpoint semaphore and check again
383 * because somebody else might have raced us. */
384 PPDMBLKCACHE pBlkCache = pCurr->pBlkCache;
385 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
386
387 if (!(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
388 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
389 {
390 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
391
392 if (fReuseBuffer && pCurr->cbData == cbData)
393 {
394 STAM_COUNTER_INC(&pCache->StatBuffersReused);
395 *ppbBuffer = pCurr->pbData;
396 }
397 else if (pCurr->pbData)
398 RTMemPageFree(pCurr->pbData, pCurr->cbData);
399
400 pCurr->pbData = NULL;
401 cbEvicted += pCurr->cbData;
402
403 pdmBlkCacheEntryRemoveFromList(pCurr);
404 pdmBlkCacheSub(pCache, pCurr->cbData);
405
406 if (pGhostListDst)
407 {
408 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
409
410 PPDMBLKCACHEENTRY pGhostEntFree = pGhostListDst->pTail;
411
412 /* We have to remove the last entries from the paged out list. */
413 while ( pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax
414 && pGhostEntFree)
415 {
416 PPDMBLKCACHEENTRY pFree = pGhostEntFree;
417 PPDMBLKCACHE pBlkCacheFree = pFree->pBlkCache;
418
419 pGhostEntFree = pGhostEntFree->pPrev;
420
421 RTSemRWRequestWrite(pBlkCacheFree->SemRWEntries, RT_INDEFINITE_WAIT);
422
423 if (ASMAtomicReadU32(&pFree->cRefs) == 0)
424 {
425 pdmBlkCacheEntryRemoveFromList(pFree);
426
427 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
428 RTAvlrU64Remove(pBlkCacheFree->pTree, pFree->Core.Key);
429 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
430
431 RTMemFree(pFree);
432 }
433
434 RTSemRWReleaseWrite(pBlkCacheFree->SemRWEntries);
435 }
436
437 if (pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax)
438 {
439 /* Couldn't remove enough entries. Delete */
440 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
441 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
442 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
443
444 RTMemFree(pCurr);
445 }
446 else
447 pdmBlkCacheEntryAddToList(pGhostListDst, pCurr);
448 }
449 else
450 {
451 /* Delete the entry from the AVL tree it is assigned to. */
452 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
453 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
454 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
455
456 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
457 RTMemFree(pCurr);
458 }
459 }
460
461 }
462 else
463 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
464 }
465
466 return cbEvicted;
467}
468
469static bool pdmBlkCacheReclaim(PPDMBLKCACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
470{
471 size_t cbRemoved = 0;
472
473 if ((pCache->cbCached + cbData) < pCache->cbMax)
474 return true;
475 else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax)
476 {
477 /* Try to evict as many bytes as possible from A1in */
478 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn,
479 &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer);
480
481 /*
482 * If it was not possible to remove enough entries
483 * try the frequently accessed cache.
484 */
485 if (cbRemoved < cbData)
486 {
487 Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */
488
489 /*
490 * If we removed something we can't pass the reuse buffer flag anymore because
491 * we don't need to evict that much data
492 */
493 if (!cbRemoved)
494 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
495 NULL, fReuseBuffer, ppbBuffer);
496 else
497 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
498 NULL, false, NULL);
499 }
500 }
501 else
502 {
503 /* We have to remove entries from frequently access list. */
504 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
505 NULL, fReuseBuffer, ppbBuffer);
506 }
507
508 LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData));
509 return (cbRemoved >= cbData);
510}
511
512DECLINLINE(int) pdmBlkCacheEnqueue(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cbXfer, PPDMBLKCACHEIOXFER pIoXfer)
513{
514 int rc = VINF_SUCCESS;
515
516 LogFlowFunc(("%s: Enqueuing hIoXfer=%#p enmXferDir=%d\n",
517 __FUNCTION__, pIoXfer, pIoXfer->enmXferDir));
518
519 ASMAtomicIncU32(&pBlkCache->cIoXfersActive);
520 pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: I/O req %#p (%RTbool , %d) queued (%u now active)",
521 pIoXfer, pIoXfer->fIoCache, pIoXfer->enmXferDir, pBlkCache->cIoXfersActive);
522
523 switch (pBlkCache->enmType)
524 {
525 case PDMBLKCACHETYPE_DEV:
526 {
527 rc = pBlkCache->u.Dev.pfnXferEnqueue(pBlkCache->u.Dev.pDevIns,
528 pIoXfer->enmXferDir,
529 off, cbXfer,
530 &pIoXfer->SgBuf, pIoXfer);
531 break;
532 }
533 case PDMBLKCACHETYPE_DRV:
534 {
535 rc = pBlkCache->u.Drv.pfnXferEnqueue(pBlkCache->u.Drv.pDrvIns,
536 pIoXfer->enmXferDir,
537 off, cbXfer,
538 &pIoXfer->SgBuf, pIoXfer);
539 break;
540 }
541 case PDMBLKCACHETYPE_USB:
542 {
543 rc = pBlkCache->u.Usb.pfnXferEnqueue(pBlkCache->u.Usb.pUsbIns,
544 pIoXfer->enmXferDir,
545 off, cbXfer,
546 &pIoXfer->SgBuf, pIoXfer);
547 break;
548 }
549 case PDMBLKCACHETYPE_INTERNAL:
550 {
551 rc = pBlkCache->u.Int.pfnXferEnqueue(pBlkCache->u.Int.pvUser,
552 pIoXfer->enmXferDir,
553 off, cbXfer,
554 &pIoXfer->SgBuf, pIoXfer);
555 break;
556 }
557 default:
558 AssertMsgFailed(("Unknown block cache type!\n"));
559 }
560
561 if (RT_FAILURE(rc))
562 {
563 pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: Queueing I/O req %#p failed %Rrc", pIoXfer, rc);
564 ASMAtomicDecU32(&pBlkCache->cIoXfersActive);
565 }
566
567 LogFlowFunc(("%s: returns rc=%Rrc\n", __FUNCTION__, rc));
568 return rc;
569}
570
571/**
572 * Initiates a read I/O task for the given entry.
573 *
574 * @returns VBox status code.
575 * @param pEntry The entry to fetch the data to.
576 */
577static int pdmBlkCacheEntryReadFromMedium(PPDMBLKCACHEENTRY pEntry)
578{
579 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
580 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
581
582 /* Make sure no one evicts the entry while it is accessed. */
583 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
584
585 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
586 if (RT_UNLIKELY(!pIoXfer))
587 return VERR_NO_MEMORY;
588
589 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
590
591 pIoXfer->fIoCache = true;
592 pIoXfer->pEntry = pEntry;
593 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
594 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
595 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_READ;
596 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
597
598 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pEntry->cbData, pIoXfer);
599}
600
601/**
602 * Initiates a write I/O task for the given entry.
603 *
604 * @returns nothing.
605 * @param pEntry The entry to read the data from.
606 */
607static int pdmBlkCacheEntryWriteToMedium(PPDMBLKCACHEENTRY pEntry)
608{
609 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
610 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
611
612 /* Make sure no one evicts the entry while it is accessed. */
613 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
614
615 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
616 if (RT_UNLIKELY(!pIoXfer))
617 return VERR_NO_MEMORY;
618
619 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
620
621 pIoXfer->fIoCache = true;
622 pIoXfer->pEntry = pEntry;
623 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
624 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
625 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_WRITE;
626 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
627
628 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pEntry->cbData, pIoXfer);
629}
630
631/**
632 * Passthrough a part of a request directly to the I/O manager handling the
633 * endpoint.
634 *
635 * @returns VBox status code.
636 * @param pBlkCache The endpoint cache.
637 * @param pReq The request.
638 * @param pSgBuf The scatter/gather buffer.
639 * @param offStart Offset to start transfer from.
640 * @param cbData Amount of data to transfer.
641 * @param enmXferDir The transfer type (read/write)
642 */
643static int pdmBlkCacheRequestPassthrough(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
644 PRTSGBUF pSgBuf, uint64_t offStart, size_t cbData,
645 PDMBLKCACHEXFERDIR enmXferDir)
646{
647
648 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
649 if (RT_UNLIKELY(!pIoXfer))
650 return VERR_NO_MEMORY;
651
652 ASMAtomicIncU32(&pReq->cXfersPending);
653 pIoXfer->fIoCache = false;
654 pIoXfer->pReq = pReq;
655 pIoXfer->enmXferDir = enmXferDir;
656 if (pSgBuf)
657 {
658 RTSgBufClone(&pIoXfer->SgBuf, pSgBuf);
659 RTSgBufAdvance(pSgBuf, cbData);
660 }
661
662 return pdmBlkCacheEnqueue(pBlkCache, offStart, cbData, pIoXfer);
663}
664
665/**
666 * Commit a single dirty entry to the endpoint
667 *
668 * @returns nothing
669 * @param pEntry The entry to commit.
670 */
671static void pdmBlkCacheEntryCommit(PPDMBLKCACHEENTRY pEntry)
672{
673 AssertMsg( (pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY)
674 && !(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
675 ("Invalid flags set for entry %#p\n", pEntry));
676
677 pdmBlkCacheEntryWriteToMedium(pEntry);
678}
679
680/**
681 * Commit all dirty entries for a single endpoint.
682 *
683 * @returns nothing.
684 * @param pBlkCache The endpoint cache to commit.
685 */
686static void pdmBlkCacheCommit(PPDMBLKCACHE pBlkCache)
687{
688 uint32_t cbCommitted = 0;
689
690 /* Return if the cache was suspended. */
691 if (pBlkCache->fSuspended)
692 return;
693
694 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
695
696 /* The list is moved to a new header to reduce locking overhead. */
697 RTLISTANCHOR ListDirtyNotCommitted;
698
699 RTSpinlockAcquire(pBlkCache->LockList);
700 RTListMove(&ListDirtyNotCommitted, &pBlkCache->ListDirtyNotCommitted);
701 RTSpinlockRelease(pBlkCache->LockList);
702
703 if (!RTListIsEmpty(&ListDirtyNotCommitted))
704 {
705 PPDMBLKCACHEENTRY pEntry = RTListGetFirst(&ListDirtyNotCommitted, PDMBLKCACHEENTRY, NodeNotCommitted);
706
707 while (!RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted))
708 {
709 PPDMBLKCACHEENTRY pNext = RTListNodeGetNext(&pEntry->NodeNotCommitted, PDMBLKCACHEENTRY,
710 NodeNotCommitted);
711 pdmBlkCacheEntryCommit(pEntry);
712 cbCommitted += pEntry->cbData;
713 RTListNodeRemove(&pEntry->NodeNotCommitted);
714 pEntry = pNext;
715 }
716
717 /* Commit the last endpoint */
718 Assert(RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted));
719 pdmBlkCacheEntryCommit(pEntry);
720 cbCommitted += pEntry->cbData;
721 RTListNodeRemove(&pEntry->NodeNotCommitted);
722 AssertMsg(RTListIsEmpty(&ListDirtyNotCommitted),
723 ("Committed all entries but list is not empty\n"));
724 }
725
726 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
727 AssertMsg(pBlkCache->pCache->cbDirty >= cbCommitted,
728 ("Number of committed bytes exceeds number of dirty bytes\n"));
729 uint32_t cbDirtyOld = ASMAtomicSubU32(&pBlkCache->pCache->cbDirty, cbCommitted);
730
731 /* Reset the commit timer if we don't have any dirty bits. */
732 if ( !(cbDirtyOld - cbCommitted)
733 && pBlkCache->pCache->u32CommitTimeoutMs != 0)
734 TMTimerStop(pBlkCache->pCache->pTimerCommit);
735}
736
737/**
738 * Commit all dirty entries in the cache.
739 *
740 * @returns nothing.
741 * @param pCache The global cache instance.
742 */
743static void pdmBlkCacheCommitDirtyEntries(PPDMBLKCACHEGLOBAL pCache)
744{
745 bool fCommitInProgress = ASMAtomicXchgBool(&pCache->fCommitInProgress, true);
746
747 if (!fCommitInProgress)
748 {
749 pdmBlkCacheLockEnter(pCache);
750 Assert(!RTListIsEmpty(&pCache->ListUsers));
751
752 PPDMBLKCACHE pBlkCache = RTListGetFirst(&pCache->ListUsers, PDMBLKCACHE, NodeCacheUser);
753 AssertPtr(pBlkCache);
754
755 while (!RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser))
756 {
757 pdmBlkCacheCommit(pBlkCache);
758
759 pBlkCache = RTListNodeGetNext(&pBlkCache->NodeCacheUser, PDMBLKCACHE,
760 NodeCacheUser);
761 }
762
763 /* Commit the last endpoint */
764 Assert(RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser));
765 pdmBlkCacheCommit(pBlkCache);
766
767 pdmBlkCacheLockLeave(pCache);
768 ASMAtomicWriteBool(&pCache->fCommitInProgress, false);
769 }
770}
771
772/**
773 * Adds the given entry as a dirty to the cache.
774 *
775 * @returns Flag whether the amount of dirty bytes in the cache exceeds the threshold
776 * @param pBlkCache The endpoint cache the entry belongs to.
777 * @param pEntry The entry to add.
778 */
779static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
780{
781 bool fDirtyBytesExceeded = false;
782 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
783
784 /* If the commit timer is disabled we commit right away. */
785 if (pCache->u32CommitTimeoutMs == 0)
786 {
787 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
788 pdmBlkCacheEntryCommit(pEntry);
789 }
790 else if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY))
791 {
792 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
793
794 RTSpinlockAcquire(pBlkCache->LockList);
795 RTListAppend(&pBlkCache->ListDirtyNotCommitted, &pEntry->NodeNotCommitted);
796 RTSpinlockRelease(pBlkCache->LockList);
797
798 uint32_t cbDirty = ASMAtomicAddU32(&pCache->cbDirty, pEntry->cbData);
799
800 /* Prevent committing if the VM was suspended. */
801 if (RT_LIKELY(!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended)))
802 fDirtyBytesExceeded = (cbDirty + pEntry->cbData >= pCache->cbCommitDirtyThreshold);
803 else if (!cbDirty && pCache->u32CommitTimeoutMs > 0)
804 {
805 /* Arm the commit timer. */
806 TMTimerSetMillies(pCache->pTimerCommit, pCache->u32CommitTimeoutMs);
807 }
808 }
809
810 return fDirtyBytesExceeded;
811}
812
813static PPDMBLKCACHE pdmR3BlkCacheFindById(PPDMBLKCACHEGLOBAL pBlkCacheGlobal, const char *pcszId)
814{
815 bool fFound = false;
816
817 PPDMBLKCACHE pBlkCache;
818 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
819 {
820 if (!RTStrCmp(pBlkCache->pszId, pcszId))
821 {
822 fFound = true;
823 break;
824 }
825 }
826
827 return fFound ? pBlkCache : NULL;
828}
829
830/**
831 * Commit timer callback.
832 */
833static DECLCALLBACK(void) pdmBlkCacheCommitTimerCallback(PVM pVM, PTMTIMER pTimer, void *pvUser)
834{
835 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
836 NOREF(pVM); NOREF(pTimer);
837
838 LogFlowFunc(("Commit interval expired, commiting dirty entries\n"));
839
840 if ( ASMAtomicReadU32(&pCache->cbDirty) > 0
841 && !ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
842 pdmBlkCacheCommitDirtyEntries(pCache);
843
844 LogFlowFunc(("Entries committed, going to sleep\n"));
845}
846
847static DECLCALLBACK(int) pdmR3BlkCacheSaveExec(PVM pVM, PSSMHANDLE pSSM)
848{
849 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
850
851 AssertPtr(pBlkCacheGlobal);
852
853 pdmBlkCacheLockEnter(pBlkCacheGlobal);
854
855 SSMR3PutU32(pSSM, pBlkCacheGlobal->cRefs);
856
857 /* Go through the list and save all dirty entries. */
858 PPDMBLKCACHE pBlkCache;
859 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
860 {
861 uint32_t cEntries = 0;
862 PPDMBLKCACHEENTRY pEntry;
863
864 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
865 SSMR3PutU32(pSSM, (uint32_t)strlen(pBlkCache->pszId));
866 SSMR3PutStrZ(pSSM, pBlkCache->pszId);
867
868 /* Count the number of entries to safe. */
869 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
870 {
871 cEntries++;
872 }
873
874 SSMR3PutU32(pSSM, cEntries);
875
876 /* Walk the list of all dirty entries and save them. */
877 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
878 {
879 /* A few sanity checks. */
880 AssertMsg(!pEntry->cRefs, ("The entry is still referenced\n"));
881 AssertMsg(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY, ("Entry is not dirty\n"));
882 AssertMsg(!(pEntry->fFlags & ~PDMBLKCACHE_ENTRY_IS_DIRTY), ("Invalid flags set\n"));
883 AssertMsg(!pEntry->pWaitingHead && !pEntry->pWaitingTail, ("There are waiting requests\n"));
884 AssertMsg( pEntry->pList == &pBlkCacheGlobal->LruRecentlyUsedIn
885 || pEntry->pList == &pBlkCacheGlobal->LruFrequentlyUsed,
886 ("Invalid list\n"));
887 AssertMsg(pEntry->cbData == pEntry->Core.KeyLast - pEntry->Core.Key + 1,
888 ("Size and range do not match\n"));
889
890 /* Save */
891 SSMR3PutU64(pSSM, pEntry->Core.Key);
892 SSMR3PutU32(pSSM, pEntry->cbData);
893 SSMR3PutMem(pSSM, pEntry->pbData, pEntry->cbData);
894 }
895
896 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
897 }
898
899 pdmBlkCacheLockLeave(pBlkCacheGlobal);
900
901 /* Terminator */
902 return SSMR3PutU32(pSSM, UINT32_MAX);
903}
904
905static DECLCALLBACK(int) pdmR3BlkCacheLoadExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass)
906{
907 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
908 uint32_t cRefs;
909
910 NOREF(uPass);
911 AssertPtr(pBlkCacheGlobal);
912
913 pdmBlkCacheLockEnter(pBlkCacheGlobal);
914
915 if (uVersion != PDM_BLK_CACHE_SAVED_STATE_VERSION)
916 return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION;
917
918 SSMR3GetU32(pSSM, &cRefs);
919
920 /*
921 * Fewer users in the saved state than in the current VM are allowed
922 * because that means that there are only new ones which don't have any saved state
923 * which can get lost.
924 * More saved state entries than registered cache users are only allowed if the
925 * missing users don't have any data saved in the cache.
926 */
927 int rc = VINF_SUCCESS;
928 char *pszId = NULL;
929
930 while ( cRefs > 0
931 && RT_SUCCESS(rc))
932 {
933 PPDMBLKCACHE pBlkCache = NULL;
934 uint32_t cbId = 0;
935
936 SSMR3GetU32(pSSM, &cbId);
937 Assert(cbId > 0);
938
939 cbId++; /* Include terminator */
940 pszId = (char *)RTMemAllocZ(cbId * sizeof(char));
941 if (!pszId)
942 {
943 rc = VERR_NO_MEMORY;
944 break;
945 }
946
947 rc = SSMR3GetStrZ(pSSM, pszId, cbId);
948 AssertRC(rc);
949
950 /* Search for the block cache with the provided id. */
951 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pszId);
952
953 /* Get the entries */
954 uint32_t cEntries;
955 SSMR3GetU32(pSSM, &cEntries);
956
957 if (!pBlkCache && (cEntries > 0))
958 {
959 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
960 N_("The VM is missing a block device and there is data in the cache. Please make sure the source and target VMs have compatible storage configurations"));
961 break;
962 }
963
964 RTMemFree(pszId);
965 pszId = NULL;
966
967 while (cEntries > 0)
968 {
969 PPDMBLKCACHEENTRY pEntry;
970 uint64_t off;
971 uint32_t cbEntry;
972
973 SSMR3GetU64(pSSM, &off);
974 SSMR3GetU32(pSSM, &cbEntry);
975
976 pEntry = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, NULL);
977 if (!pEntry)
978 {
979 rc = VERR_NO_MEMORY;
980 break;
981 }
982
983 rc = SSMR3GetMem(pSSM, pEntry->pbData, cbEntry);
984 if (RT_FAILURE(rc))
985 {
986 RTMemFree(pEntry->pbData);
987 RTMemFree(pEntry);
988 break;
989 }
990
991 /* Insert into the tree. */
992 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
993 Assert(fInserted); NOREF(fInserted);
994
995 /* Add to the dirty list. */
996 pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
997 pdmBlkCacheEntryAddToList(&pBlkCacheGlobal->LruRecentlyUsedIn, pEntry);
998 pdmBlkCacheAdd(pBlkCacheGlobal, cbEntry);
999 pdmBlkCacheEntryRelease(pEntry);
1000 cEntries--;
1001 }
1002
1003 cRefs--;
1004 }
1005
1006 if (pszId)
1007 RTMemFree(pszId);
1008
1009 if (cRefs && RT_SUCCESS(rc))
1010 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
1011 N_("Unexpected error while restoring state. Please make sure the source and target VMs have compatible storage configurations"));
1012
1013 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1014
1015 if (RT_SUCCESS(rc))
1016 {
1017 uint32_t u32 = 0;
1018 rc = SSMR3GetU32(pSSM, &u32);
1019 if (RT_SUCCESS(rc))
1020 AssertMsgReturn(u32 == UINT32_MAX, ("%#x\n", u32), VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
1021 }
1022
1023 return rc;
1024}
1025
1026int pdmR3BlkCacheInit(PVM pVM)
1027{
1028 int rc = VINF_SUCCESS;
1029 PUVM pUVM = pVM->pUVM;
1030 PPDMBLKCACHEGLOBAL pBlkCacheGlobal;
1031
1032 LogFlowFunc((": pVM=%p\n", pVM));
1033
1034 VM_ASSERT_EMT(pVM);
1035
1036 PCFGMNODE pCfgRoot = CFGMR3GetRoot(pVM);
1037 PCFGMNODE pCfgBlkCache = CFGMR3GetChild(CFGMR3GetChild(pCfgRoot, "PDM"), "BlkCache");
1038
1039 pBlkCacheGlobal = (PPDMBLKCACHEGLOBAL)RTMemAllocZ(sizeof(PDMBLKCACHEGLOBAL));
1040 if (!pBlkCacheGlobal)
1041 return VERR_NO_MEMORY;
1042
1043 RTListInit(&pBlkCacheGlobal->ListUsers);
1044 pBlkCacheGlobal->pVM = pVM;
1045 pBlkCacheGlobal->cRefs = 0;
1046 pBlkCacheGlobal->cbCached = 0;
1047 pBlkCacheGlobal->fCommitInProgress = false;
1048
1049 /* Initialize members */
1050 pBlkCacheGlobal->LruRecentlyUsedIn.pHead = NULL;
1051 pBlkCacheGlobal->LruRecentlyUsedIn.pTail = NULL;
1052 pBlkCacheGlobal->LruRecentlyUsedIn.cbCached = 0;
1053
1054 pBlkCacheGlobal->LruRecentlyUsedOut.pHead = NULL;
1055 pBlkCacheGlobal->LruRecentlyUsedOut.pTail = NULL;
1056 pBlkCacheGlobal->LruRecentlyUsedOut.cbCached = 0;
1057
1058 pBlkCacheGlobal->LruFrequentlyUsed.pHead = NULL;
1059 pBlkCacheGlobal->LruFrequentlyUsed.pTail = NULL;
1060 pBlkCacheGlobal->LruFrequentlyUsed.cbCached = 0;
1061
1062 do
1063 {
1064 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheSize", &pBlkCacheGlobal->cbMax, 5 * _1M);
1065 AssertLogRelRCBreak(rc);
1066 LogFlowFunc(("Maximum number of bytes cached %u\n", pBlkCacheGlobal->cbMax));
1067
1068 pBlkCacheGlobal->cbRecentlyUsedInMax = (pBlkCacheGlobal->cbMax / 100) * 25; /* 25% of the buffer size */
1069 pBlkCacheGlobal->cbRecentlyUsedOutMax = (pBlkCacheGlobal->cbMax / 100) * 50; /* 50% of the buffer size */
1070 LogFlowFunc(("cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n",
1071 pBlkCacheGlobal->cbRecentlyUsedInMax, pBlkCacheGlobal->cbRecentlyUsedOutMax));
1072
1073 /** @todo r=aeichner: Experiment to find optimal default values */
1074 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitIntervalMs", &pBlkCacheGlobal->u32CommitTimeoutMs, 10000 /* 10sec */);
1075 AssertLogRelRCBreak(rc);
1076 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitThreshold", &pBlkCacheGlobal->cbCommitDirtyThreshold, pBlkCacheGlobal->cbMax / 2);
1077 AssertLogRelRCBreak(rc);
1078 } while (0);
1079
1080 if (RT_SUCCESS(rc))
1081 {
1082 STAMR3Register(pVM, &pBlkCacheGlobal->cbMax,
1083 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1084 "/PDM/BlkCache/cbMax",
1085 STAMUNIT_BYTES,
1086 "Maximum cache size");
1087 STAMR3Register(pVM, &pBlkCacheGlobal->cbCached,
1088 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1089 "/PDM/BlkCache/cbCached",
1090 STAMUNIT_BYTES,
1091 "Currently used cache");
1092 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedIn.cbCached,
1093 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1094 "/PDM/BlkCache/cbCachedMruIn",
1095 STAMUNIT_BYTES,
1096 "Number of bytes cached in MRU list");
1097 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedOut.cbCached,
1098 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1099 "/PDM/BlkCache/cbCachedMruOut",
1100 STAMUNIT_BYTES,
1101 "Number of bytes cached in FRU list");
1102 STAMR3Register(pVM, &pBlkCacheGlobal->LruFrequentlyUsed.cbCached,
1103 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1104 "/PDM/BlkCache/cbCachedFru",
1105 STAMUNIT_BYTES,
1106 "Number of bytes cached in FRU ghost list");
1107
1108#ifdef VBOX_WITH_STATISTICS
1109 STAMR3Register(pVM, &pBlkCacheGlobal->cHits,
1110 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1111 "/PDM/BlkCache/CacheHits",
1112 STAMUNIT_COUNT, "Number of hits in the cache");
1113 STAMR3Register(pVM, &pBlkCacheGlobal->cPartialHits,
1114 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1115 "/PDM/BlkCache/CachePartialHits",
1116 STAMUNIT_COUNT, "Number of partial hits in the cache");
1117 STAMR3Register(pVM, &pBlkCacheGlobal->cMisses,
1118 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1119 "/PDM/BlkCache/CacheMisses",
1120 STAMUNIT_COUNT, "Number of misses when accessing the cache");
1121 STAMR3Register(pVM, &pBlkCacheGlobal->StatRead,
1122 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1123 "/PDM/BlkCache/CacheRead",
1124 STAMUNIT_BYTES, "Number of bytes read from the cache");
1125 STAMR3Register(pVM, &pBlkCacheGlobal->StatWritten,
1126 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1127 "/PDM/BlkCache/CacheWritten",
1128 STAMUNIT_BYTES, "Number of bytes written to the cache");
1129 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeGet,
1130 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1131 "/PDM/BlkCache/CacheTreeGet",
1132 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
1133 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeInsert,
1134 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1135 "/PDM/BlkCache/CacheTreeInsert",
1136 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
1137 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeRemove,
1138 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1139 "/PDM/BlkCache/CacheTreeRemove",
1140 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
1141 STAMR3Register(pVM, &pBlkCacheGlobal->StatBuffersReused,
1142 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1143 "/PDM/BlkCache/CacheBuffersReused",
1144 STAMUNIT_COUNT, "Number of times a buffer could be reused");
1145#endif
1146
1147 /* Initialize the critical section */
1148 rc = RTCritSectInit(&pBlkCacheGlobal->CritSect);
1149 }
1150
1151 if (RT_SUCCESS(rc))
1152 {
1153 /* Create the commit timer */
1154 if (pBlkCacheGlobal->u32CommitTimeoutMs > 0)
1155 rc = TMR3TimerCreateInternal(pVM, TMCLOCK_REAL,
1156 pdmBlkCacheCommitTimerCallback,
1157 pBlkCacheGlobal,
1158 "BlkCache-Commit",
1159 &pBlkCacheGlobal->pTimerCommit);
1160
1161 if (RT_SUCCESS(rc))
1162 {
1163 /* Register saved state handler. */
1164 rc = SSMR3RegisterInternal(pVM, "pdmblkcache", 0, PDM_BLK_CACHE_SAVED_STATE_VERSION, pBlkCacheGlobal->cbMax,
1165 NULL, NULL, NULL,
1166 NULL, pdmR3BlkCacheSaveExec, NULL,
1167 NULL, pdmR3BlkCacheLoadExec, NULL);
1168 if (RT_SUCCESS(rc))
1169 {
1170 LogRel(("BlkCache: Cache successfully initialized. Cache size is %u bytes\n", pBlkCacheGlobal->cbMax));
1171 LogRel(("BlkCache: Cache commit interval is %u ms\n", pBlkCacheGlobal->u32CommitTimeoutMs));
1172 LogRel(("BlkCache: Cache commit threshold is %u bytes\n", pBlkCacheGlobal->cbCommitDirtyThreshold));
1173 pUVM->pdm.s.pBlkCacheGlobal = pBlkCacheGlobal;
1174 return VINF_SUCCESS;
1175 }
1176 }
1177
1178 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1179 }
1180
1181 if (pBlkCacheGlobal)
1182 RTMemFree(pBlkCacheGlobal);
1183
1184 LogFlowFunc((": returns rc=%Rrc\n", rc));
1185 return rc;
1186}
1187
1188void pdmR3BlkCacheTerm(PVM pVM)
1189{
1190 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1191
1192 if (pBlkCacheGlobal)
1193 {
1194 /* Make sure no one else uses the cache now */
1195 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1196
1197 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
1198 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedIn);
1199 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedOut);
1200 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruFrequentlyUsed);
1201
1202 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1203
1204 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1205 RTMemFree(pBlkCacheGlobal);
1206 pVM->pUVM->pdm.s.pBlkCacheGlobal = NULL;
1207 }
1208}
1209
1210int pdmR3BlkCacheResume(PVM pVM)
1211{
1212 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1213
1214 LogFlowFunc(("pVM=%#p\n", pVM));
1215
1216 if ( pBlkCacheGlobal
1217 && ASMAtomicXchgBool(&pBlkCacheGlobal->fIoErrorVmSuspended, false))
1218 {
1219 /* The VM was suspended because of an I/O error, commit all dirty entries. */
1220 pdmBlkCacheCommitDirtyEntries(pBlkCacheGlobal);
1221 }
1222
1223 return VINF_SUCCESS;
1224}
1225
1226static int pdmR3BlkCacheRetain(PVM pVM, PPPDMBLKCACHE ppBlkCache, const char *pcszId)
1227{
1228 int rc = VINF_SUCCESS;
1229 PPDMBLKCACHE pBlkCache = NULL;
1230 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1231
1232 if (!pBlkCacheGlobal)
1233 return VERR_NOT_SUPPORTED;
1234
1235 /*
1236 * Check that no other user cache has the same id first,
1237 * Unique id's are necessary in case the state is saved.
1238 */
1239 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1240
1241 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pcszId);
1242
1243 if (!pBlkCache)
1244 {
1245 pBlkCache = (PPDMBLKCACHE)RTMemAllocZ(sizeof(PDMBLKCACHE));
1246
1247 if (pBlkCache)
1248 pBlkCache->pszId = RTStrDup(pcszId);
1249
1250 if ( pBlkCache
1251 && pBlkCache->pszId)
1252 {
1253 pBlkCache->fSuspended = false;
1254 pBlkCache->cIoXfersActive = 0;
1255 pBlkCache->pCache = pBlkCacheGlobal;
1256 RTListInit(&pBlkCache->ListDirtyNotCommitted);
1257
1258 rc = RTSpinlockCreate(&pBlkCache->LockList, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "pdmR3BlkCacheRetain");
1259 if (RT_SUCCESS(rc))
1260 {
1261 rc = RTSemRWCreate(&pBlkCache->SemRWEntries);
1262 if (RT_SUCCESS(rc))
1263 {
1264 pBlkCache->pTree = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
1265 if (pBlkCache->pTree)
1266 {
1267#ifdef VBOX_WITH_STATISTICS
1268 STAMR3RegisterF(pBlkCacheGlobal->pVM, &pBlkCache->StatWriteDeferred,
1269 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1270 STAMUNIT_COUNT, "Number of deferred writes",
1271 "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
1272#endif
1273
1274 /* Add to the list of users. */
1275 pBlkCacheGlobal->cRefs++;
1276 RTListAppend(&pBlkCacheGlobal->ListUsers, &pBlkCache->NodeCacheUser);
1277 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1278
1279 *ppBlkCache = pBlkCache;
1280 LogFlowFunc(("returns success\n"));
1281 return VINF_SUCCESS;
1282 }
1283
1284 rc = VERR_NO_MEMORY;
1285 RTSemRWDestroy(pBlkCache->SemRWEntries);
1286 }
1287
1288 RTSpinlockDestroy(pBlkCache->LockList);
1289 }
1290
1291 RTStrFree(pBlkCache->pszId);
1292 }
1293 else
1294 rc = VERR_NO_MEMORY;
1295
1296 if (pBlkCache)
1297 RTMemFree(pBlkCache);
1298 }
1299 else
1300 rc = VERR_ALREADY_EXISTS;
1301
1302 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1303
1304 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1305 return rc;
1306}
1307
1308VMMR3DECL(int) PDMR3BlkCacheRetainDriver(PVM pVM, PPDMDRVINS pDrvIns, PPPDMBLKCACHE ppBlkCache,
1309 PFNPDMBLKCACHEXFERCOMPLETEDRV pfnXferComplete,
1310 PFNPDMBLKCACHEXFERENQUEUEDRV pfnXferEnqueue,
1311 PFNPDMBLKCACHEXFERENQUEUEDISCARDDRV pfnXferEnqueueDiscard,
1312 const char *pcszId)
1313{
1314 int rc = VINF_SUCCESS;
1315 PPDMBLKCACHE pBlkCache;
1316
1317 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1318 if (RT_SUCCESS(rc))
1319 {
1320 pBlkCache->enmType = PDMBLKCACHETYPE_DRV;
1321 pBlkCache->u.Drv.pfnXferComplete = pfnXferComplete;
1322 pBlkCache->u.Drv.pfnXferEnqueue = pfnXferEnqueue;
1323 pBlkCache->u.Drv.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1324 pBlkCache->u.Drv.pDrvIns = pDrvIns;
1325 *ppBlkCache = pBlkCache;
1326 }
1327
1328 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1329 return rc;
1330}
1331
1332VMMR3DECL(int) PDMR3BlkCacheRetainDevice(PVM pVM, PPDMDEVINS pDevIns, PPPDMBLKCACHE ppBlkCache,
1333 PFNPDMBLKCACHEXFERCOMPLETEDEV pfnXferComplete,
1334 PFNPDMBLKCACHEXFERENQUEUEDEV pfnXferEnqueue,
1335 PFNPDMBLKCACHEXFERENQUEUEDISCARDDEV pfnXferEnqueueDiscard,
1336 const char *pcszId)
1337{
1338 int rc = VINF_SUCCESS;
1339 PPDMBLKCACHE pBlkCache;
1340
1341 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1342 if (RT_SUCCESS(rc))
1343 {
1344 pBlkCache->enmType = PDMBLKCACHETYPE_DEV;
1345 pBlkCache->u.Dev.pfnXferComplete = pfnXferComplete;
1346 pBlkCache->u.Dev.pfnXferEnqueue = pfnXferEnqueue;
1347 pBlkCache->u.Dev.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1348 pBlkCache->u.Dev.pDevIns = pDevIns;
1349 *ppBlkCache = pBlkCache;
1350 }
1351
1352 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1353 return rc;
1354
1355}
1356
1357VMMR3DECL(int) PDMR3BlkCacheRetainUsb(PVM pVM, PPDMUSBINS pUsbIns, PPPDMBLKCACHE ppBlkCache,
1358 PFNPDMBLKCACHEXFERCOMPLETEUSB pfnXferComplete,
1359 PFNPDMBLKCACHEXFERENQUEUEUSB pfnXferEnqueue,
1360 PFNPDMBLKCACHEXFERENQUEUEDISCARDUSB pfnXferEnqueueDiscard,
1361 const char *pcszId)
1362{
1363 int rc = VINF_SUCCESS;
1364 PPDMBLKCACHE pBlkCache;
1365
1366 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1367 if (RT_SUCCESS(rc))
1368 {
1369 pBlkCache->enmType = PDMBLKCACHETYPE_USB;
1370 pBlkCache->u.Usb.pfnXferComplete = pfnXferComplete;
1371 pBlkCache->u.Usb.pfnXferEnqueue = pfnXferEnqueue;
1372 pBlkCache->u.Usb.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1373 pBlkCache->u.Usb.pUsbIns = pUsbIns;
1374 *ppBlkCache = pBlkCache;
1375 }
1376
1377 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1378 return rc;
1379
1380}
1381
1382VMMR3DECL(int) PDMR3BlkCacheRetainInt(PVM pVM, void *pvUser, PPPDMBLKCACHE ppBlkCache,
1383 PFNPDMBLKCACHEXFERCOMPLETEINT pfnXferComplete,
1384 PFNPDMBLKCACHEXFERENQUEUEINT pfnXferEnqueue,
1385 PFNPDMBLKCACHEXFERENQUEUEDISCARDINT pfnXferEnqueueDiscard,
1386 const char *pcszId)
1387{
1388 int rc = VINF_SUCCESS;
1389 PPDMBLKCACHE pBlkCache;
1390
1391 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1392 if (RT_SUCCESS(rc))
1393 {
1394 pBlkCache->enmType = PDMBLKCACHETYPE_INTERNAL;
1395 pBlkCache->u.Int.pfnXferComplete = pfnXferComplete;
1396 pBlkCache->u.Int.pfnXferEnqueue = pfnXferEnqueue;
1397 pBlkCache->u.Int.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1398 pBlkCache->u.Int.pvUser = pvUser;
1399 *ppBlkCache = pBlkCache;
1400 }
1401
1402 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1403 return rc;
1404
1405}
1406
1407/**
1408 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
1409 *
1410 * @returns IPRT status code.
1411 * @param pNode The node to destroy.
1412 * @param pvUser Opaque user data.
1413 */
1414static DECLCALLBACK(int) pdmBlkCacheEntryDestroy(PAVLRU64NODECORE pNode, void *pvUser)
1415{
1416 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
1417 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
1418 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
1419
1420 while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)
1421 {
1422 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
1423 pdmBlkCacheEntryRef(pEntry);
1424 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1425 pdmBlkCacheLockLeave(pCache);
1426
1427 RTThreadSleep(250);
1428
1429 /* Re-enter all locks */
1430 pdmBlkCacheLockEnter(pCache);
1431 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1432 pdmBlkCacheEntryRelease(pEntry);
1433 }
1434
1435 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
1436 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
1437
1438 bool fUpdateCache = pEntry->pList == &pCache->LruFrequentlyUsed
1439 || pEntry->pList == &pCache->LruRecentlyUsedIn;
1440
1441 pdmBlkCacheEntryRemoveFromList(pEntry);
1442
1443 if (fUpdateCache)
1444 pdmBlkCacheSub(pCache, pEntry->cbData);
1445
1446 RTMemPageFree(pEntry->pbData, pEntry->cbData);
1447 RTMemFree(pEntry);
1448
1449 return VINF_SUCCESS;
1450}
1451
1452/**
1453 * Destroys all cache resources used by the given endpoint.
1454 *
1455 * @returns nothing.
1456 * @param pBlkCache Block cache handle.
1457 */
1458VMMR3DECL(void) PDMR3BlkCacheRelease(PPDMBLKCACHE pBlkCache)
1459{
1460 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1461
1462 /*
1463 * Commit all dirty entries now (they are waited on for completion during the
1464 * destruction of the AVL tree below).
1465 * The exception is if the VM was paused because of an I/O error before.
1466 */
1467 if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
1468 pdmBlkCacheCommit(pBlkCache);
1469
1470 /* Make sure nobody is accessing the cache while we delete the tree. */
1471 pdmBlkCacheLockEnter(pCache);
1472 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1473 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
1474 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1475
1476 RTSpinlockDestroy(pBlkCache->LockList);
1477
1478 pCache->cRefs--;
1479 RTListNodeRemove(&pBlkCache->NodeCacheUser);
1480
1481 pdmBlkCacheLockLeave(pCache);
1482
1483 RTMemFree(pBlkCache->pTree);
1484 pBlkCache->pTree = NULL;
1485 RTSemRWDestroy(pBlkCache->SemRWEntries);
1486
1487#ifdef VBOX_WITH_STATISTICS
1488 STAMR3DeregisterF(pCache->pVM->pUVM, "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
1489#endif
1490
1491 RTStrFree(pBlkCache->pszId);
1492 RTMemFree(pBlkCache);
1493}
1494
1495VMMR3DECL(void) PDMR3BlkCacheReleaseDevice(PVM pVM, PPDMDEVINS pDevIns)
1496{
1497 LogFlow(("%s: pDevIns=%p\n", __FUNCTION__, pDevIns));
1498
1499 /*
1500 * Validate input.
1501 */
1502 if (!pDevIns)
1503 return;
1504 VM_ASSERT_EMT(pVM);
1505
1506 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1507 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1508
1509 /* Return silently if not supported. */
1510 if (!pBlkCacheGlobal)
1511 return;
1512
1513 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1514
1515 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1516 {
1517 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DEV
1518 && pBlkCache->u.Dev.pDevIns == pDevIns)
1519 PDMR3BlkCacheRelease(pBlkCache);
1520 }
1521
1522 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1523}
1524
1525VMMR3DECL(void) PDMR3BlkCacheReleaseDriver(PVM pVM, PPDMDRVINS pDrvIns)
1526{
1527 LogFlow(("%s: pDrvIns=%p\n", __FUNCTION__, pDrvIns));
1528
1529 /*
1530 * Validate input.
1531 */
1532 if (!pDrvIns)
1533 return;
1534 VM_ASSERT_EMT(pVM);
1535
1536 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1537 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1538
1539 /* Return silently if not supported. */
1540 if (!pBlkCacheGlobal)
1541 return;
1542
1543 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1544
1545 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1546 {
1547 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DRV
1548 && pBlkCache->u.Drv.pDrvIns == pDrvIns)
1549 PDMR3BlkCacheRelease(pBlkCache);
1550 }
1551
1552 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1553}
1554
1555VMMR3DECL(void) PDMR3BlkCacheReleaseUsb(PVM pVM, PPDMUSBINS pUsbIns)
1556{
1557 LogFlow(("%s: pUsbIns=%p\n", __FUNCTION__, pUsbIns));
1558
1559 /*
1560 * Validate input.
1561 */
1562 if (!pUsbIns)
1563 return;
1564 VM_ASSERT_EMT(pVM);
1565
1566 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1567 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1568
1569 /* Return silently if not supported. */
1570 if (!pBlkCacheGlobal)
1571 return;
1572
1573 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1574
1575 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1576 {
1577 if ( pBlkCache->enmType == PDMBLKCACHETYPE_USB
1578 && pBlkCache->u.Usb.pUsbIns == pUsbIns)
1579 PDMR3BlkCacheRelease(pBlkCache);
1580 }
1581
1582 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1583}
1584
1585static PPDMBLKCACHEENTRY pdmBlkCacheGetCacheEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off)
1586{
1587 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeGet, Cache);
1588
1589 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1590 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)RTAvlrU64RangeGet(pBlkCache->pTree, off);
1591 if (pEntry)
1592 pdmBlkCacheEntryRef(pEntry);
1593 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1594
1595 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeGet, Cache);
1596
1597 return pEntry;
1598}
1599
1600/**
1601 * Return the best fit cache entries for the given offset.
1602 *
1603 * @returns nothing.
1604 * @param pBlkCache The endpoint cache.
1605 * @param off The offset.
1606 * @param ppEntryAbove Where to store the pointer to the best fit entry above
1607 * the given offset. NULL if not required.
1608 */
1609static void pdmBlkCacheGetCacheBestFitEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off, PPDMBLKCACHEENTRY *ppEntryAbove)
1610{
1611 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeGet, Cache);
1612
1613 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1614 if (ppEntryAbove)
1615 {
1616 *ppEntryAbove = (PPDMBLKCACHEENTRY)RTAvlrU64GetBestFit(pBlkCache->pTree, off, true /*fAbove*/);
1617 if (*ppEntryAbove)
1618 pdmBlkCacheEntryRef(*ppEntryAbove);
1619 }
1620
1621 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1622
1623 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeGet, Cache);
1624}
1625
1626static void pdmBlkCacheInsertEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
1627{
1628 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeInsert, Cache);
1629 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1630 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
1631 AssertMsg(fInserted, ("Node was not inserted into tree\n")); NOREF(fInserted);
1632 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeInsert, Cache);
1633 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1634}
1635
1636/**
1637 * Allocates and initializes a new entry for the cache.
1638 * The entry has a reference count of 1.
1639 *
1640 * @returns Pointer to the new cache entry or NULL if out of memory.
1641 * @param pBlkCache The cache the entry belongs to.
1642 * @param off Start offset.
1643 * @param cbData Size of the cache entry.
1644 * @param pbBuffer Pointer to the buffer to use.
1645 * NULL if a new buffer should be allocated.
1646 * The buffer needs to have the same size of the entry.
1647 */
1648static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cbData, uint8_t *pbBuffer)
1649{
1650 AssertReturn(cbData <= UINT32_MAX, NULL);
1651 PPDMBLKCACHEENTRY pEntryNew = (PPDMBLKCACHEENTRY)RTMemAllocZ(sizeof(PDMBLKCACHEENTRY));
1652
1653 if (RT_UNLIKELY(!pEntryNew))
1654 return NULL;
1655
1656 pEntryNew->Core.Key = off;
1657 pEntryNew->Core.KeyLast = off + cbData - 1;
1658 pEntryNew->pBlkCache = pBlkCache;
1659 pEntryNew->fFlags = 0;
1660 pEntryNew->cRefs = 1; /* We are using it now. */
1661 pEntryNew->pList = NULL;
1662 pEntryNew->cbData = (uint32_t)cbData;
1663 pEntryNew->pWaitingHead = NULL;
1664 pEntryNew->pWaitingTail = NULL;
1665 if (pbBuffer)
1666 pEntryNew->pbData = pbBuffer;
1667 else
1668 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
1669
1670 if (RT_UNLIKELY(!pEntryNew->pbData))
1671 {
1672 RTMemFree(pEntryNew);
1673 return NULL;
1674 }
1675
1676 return pEntryNew;
1677}
1678
1679/**
1680 * Checks that a set of flags is set/clear acquiring the R/W semaphore
1681 * in exclusive mode.
1682 *
1683 * @returns true if the flag in fSet is set and the one in fClear is clear.
1684 * false otherwise.
1685 * The R/W semaphore is only held if true is returned.
1686 *
1687 * @param pBlkCache The endpoint cache instance data.
1688 * @param pEntry The entry to check the flags for.
1689 * @param fSet The flag which is tested to be set.
1690 * @param fClear The flag which is tested to be clear.
1691 */
1692DECLINLINE(bool) pdmBlkCacheEntryFlagIsSetClearAcquireLock(PPDMBLKCACHE pBlkCache,
1693 PPDMBLKCACHEENTRY pEntry,
1694 uint32_t fSet, uint32_t fClear)
1695{
1696 uint32_t fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1697 bool fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1698
1699 if (fPassed)
1700 {
1701 /* Acquire the lock and check again because the completion callback might have raced us. */
1702 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1703
1704 fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1705 fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1706
1707 /* Drop the lock if we didn't passed the test. */
1708 if (!fPassed)
1709 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1710 }
1711
1712 return fPassed;
1713}
1714
1715/**
1716 * Adds a segment to the waiting list for a cache entry
1717 * which is currently in progress.
1718 *
1719 * @returns nothing.
1720 * @param pEntry The cache entry to add the segment to.
1721 * @param pWaiter The waiter entry to add.
1722 */
1723DECLINLINE(void) pdmBlkCacheEntryAddWaiter(PPDMBLKCACHEENTRY pEntry,
1724 PPDMBLKCACHEWAITER pWaiter)
1725{
1726 pWaiter->pNext = NULL;
1727
1728 if (pEntry->pWaitingHead)
1729 {
1730 AssertPtr(pEntry->pWaitingTail);
1731
1732 pEntry->pWaitingTail->pNext = pWaiter;
1733 pEntry->pWaitingTail = pWaiter;
1734 }
1735 else
1736 {
1737 Assert(!pEntry->pWaitingTail);
1738
1739 pEntry->pWaitingHead = pWaiter;
1740 pEntry->pWaitingTail = pWaiter;
1741 }
1742}
1743
1744/**
1745 * Add a buffer described by the I/O memory context
1746 * to the entry waiting for completion.
1747 *
1748 * @returns VBox status code.
1749 * @param pEntry The entry to add the buffer to.
1750 * @param pReq The request.
1751 * @param pSgBuf The scatter/gather buffer. Will be advanced by cbData.
1752 * @param offDiff Offset from the start of the buffer in the entry.
1753 * @param cbData Amount of data to wait for onthis entry.
1754 * @param fWrite Flag whether the task waits because it wants to write to
1755 * the cache entry.
1756 */
1757static int pdmBlkCacheEntryWaitersAdd(PPDMBLKCACHEENTRY pEntry, PPDMBLKCACHEREQ pReq,
1758 PRTSGBUF pSgBuf, uint64_t offDiff, size_t cbData, bool fWrite)
1759{
1760 PPDMBLKCACHEWAITER pWaiter = (PPDMBLKCACHEWAITER)RTMemAllocZ(sizeof(PDMBLKCACHEWAITER));
1761 if (!pWaiter)
1762 return VERR_NO_MEMORY;
1763
1764 ASMAtomicIncU32(&pReq->cXfersPending);
1765 pWaiter->pReq = pReq;
1766 pWaiter->offCacheEntry = offDiff;
1767 pWaiter->cbTransfer = cbData;
1768 pWaiter->fWrite = fWrite;
1769 RTSgBufClone(&pWaiter->SgBuf, pSgBuf);
1770 RTSgBufAdvance(pSgBuf, cbData);
1771
1772 pdmBlkCacheEntryAddWaiter(pEntry, pWaiter);
1773
1774 return VINF_SUCCESS;
1775}
1776
1777/**
1778 * Calculate aligned offset and size for a new cache entry which do not
1779 * intersect with an already existing entry and the file end.
1780 *
1781 * @returns The number of bytes the entry can hold of the requested amount
1782 * of bytes.
1783 * @param pBlkCache The endpoint cache.
1784 * @param off The start offset.
1785 * @param cb The number of bytes the entry needs to hold at
1786 * least.
1787 * @param pcbEntry Where to store the number of bytes the entry can hold.
1788 * Can be less than given because of other entries.
1789 */
1790static uint32_t pdmBlkCacheEntryBoundariesCalc(PPDMBLKCACHE pBlkCache,
1791 uint64_t off, uint32_t cb,
1792 uint32_t *pcbEntry)
1793{
1794 /* Get the best fit entries around the offset */
1795 PPDMBLKCACHEENTRY pEntryAbove = NULL;
1796 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
1797
1798 /* Log the info */
1799 LogFlow(("%sest fit entry above off=%llu (BestFit=%llu BestFitEnd=%llu BestFitSize=%u)\n",
1800 pEntryAbove ? "B" : "No b",
1801 off,
1802 pEntryAbove ? pEntryAbove->Core.Key : 0,
1803 pEntryAbove ? pEntryAbove->Core.KeyLast : 0,
1804 pEntryAbove ? pEntryAbove->cbData : 0));
1805
1806 uint32_t cbNext;
1807 uint32_t cbInEntry;
1808 if ( pEntryAbove
1809 && off + cb > pEntryAbove->Core.Key)
1810 {
1811 cbInEntry = (uint32_t)(pEntryAbove->Core.Key - off);
1812 cbNext = (uint32_t)(pEntryAbove->Core.Key - off);
1813 }
1814 else
1815 {
1816 cbInEntry = cb;
1817 cbNext = cb;
1818 }
1819
1820 /* A few sanity checks */
1821 AssertMsg(!pEntryAbove || off + cbNext <= pEntryAbove->Core.Key,
1822 ("Aligned size intersects with another cache entry\n"));
1823 Assert(cbInEntry <= cbNext);
1824
1825 if (pEntryAbove)
1826 pdmBlkCacheEntryRelease(pEntryAbove);
1827
1828 LogFlow(("off=%llu cbNext=%u\n", off, cbNext));
1829
1830 *pcbEntry = cbNext;
1831
1832 return cbInEntry;
1833}
1834
1835/**
1836 * Create a new cache entry evicting data from the cache if required.
1837 *
1838 * @returns Pointer to the new cache entry or NULL
1839 * if not enough bytes could be evicted from the cache.
1840 * @param pBlkCache The endpoint cache.
1841 * @param off The offset.
1842 * @param cb Number of bytes the cache entry should have.
1843 * @param pcbData Where to store the number of bytes the new
1844 * entry can hold. May be lower than actually
1845 * requested due to another entry intersecting the
1846 * access range.
1847 */
1848static PPDMBLKCACHEENTRY pdmBlkCacheEntryCreate(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cb, size_t *pcbData)
1849{
1850 uint32_t cbEntry = 0;
1851
1852 *pcbData = pdmBlkCacheEntryBoundariesCalc(pBlkCache, off, (uint32_t)cb, &cbEntry);
1853 AssertReturn(cb <= UINT32_MAX, NULL);
1854
1855 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1856 pdmBlkCacheLockEnter(pCache);
1857
1858 PPDMBLKCACHEENTRY pEntryNew = NULL;
1859 uint8_t *pbBuffer = NULL;
1860 bool fEnough = pdmBlkCacheReclaim(pCache, cbEntry, true, &pbBuffer);
1861 if (fEnough)
1862 {
1863 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbEntry));
1864
1865 pEntryNew = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, pbBuffer);
1866 if (RT_LIKELY(pEntryNew))
1867 {
1868 pdmBlkCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1869 pdmBlkCacheAdd(pCache, cbEntry);
1870 pdmBlkCacheLockLeave(pCache);
1871
1872 pdmBlkCacheInsertEntry(pBlkCache, pEntryNew);
1873
1874 AssertMsg( (off >= pEntryNew->Core.Key)
1875 && (off + *pcbData <= pEntryNew->Core.KeyLast + 1),
1876 ("Overflow in calculation off=%llu\n", off));
1877 }
1878 else
1879 pdmBlkCacheLockLeave(pCache);
1880 }
1881 else
1882 pdmBlkCacheLockLeave(pCache);
1883
1884 return pEntryNew;
1885}
1886
1887static PPDMBLKCACHEREQ pdmBlkCacheReqAlloc(void *pvUser)
1888{
1889 PPDMBLKCACHEREQ pReq = (PPDMBLKCACHEREQ)RTMemAlloc(sizeof(PDMBLKCACHEREQ));
1890
1891 if (RT_LIKELY(pReq))
1892 {
1893 pReq->pvUser = pvUser;
1894 pReq->rcReq = VINF_SUCCESS;
1895 pReq->cXfersPending = 0;
1896 }
1897
1898 return pReq;
1899}
1900
1901static void pdmBlkCacheReqComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq)
1902{
1903 switch (pBlkCache->enmType)
1904 {
1905 case PDMBLKCACHETYPE_DEV:
1906 {
1907 pBlkCache->u.Dev.pfnXferComplete(pBlkCache->u.Dev.pDevIns,
1908 pReq->pvUser, pReq->rcReq);
1909 break;
1910 }
1911 case PDMBLKCACHETYPE_DRV:
1912 {
1913 pBlkCache->u.Drv.pfnXferComplete(pBlkCache->u.Drv.pDrvIns,
1914 pReq->pvUser, pReq->rcReq);
1915 break;
1916 }
1917 case PDMBLKCACHETYPE_USB:
1918 {
1919 pBlkCache->u.Usb.pfnXferComplete(pBlkCache->u.Usb.pUsbIns,
1920 pReq->pvUser, pReq->rcReq);
1921 break;
1922 }
1923 case PDMBLKCACHETYPE_INTERNAL:
1924 {
1925 pBlkCache->u.Int.pfnXferComplete(pBlkCache->u.Int.pvUser,
1926 pReq->pvUser, pReq->rcReq);
1927 break;
1928 }
1929 default:
1930 AssertMsgFailed(("Unknown block cache type!\n"));
1931 }
1932
1933 RTMemFree(pReq);
1934}
1935
1936static bool pdmBlkCacheReqUpdate(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
1937 int rcReq, bool fCallHandler)
1938{
1939 if (RT_FAILURE(rcReq))
1940 ASMAtomicCmpXchgS32(&pReq->rcReq, rcReq, VINF_SUCCESS);
1941
1942 AssertMsg(pReq->cXfersPending > 0, ("No transfers are pending for this request\n"));
1943 uint32_t cXfersPending = ASMAtomicDecU32(&pReq->cXfersPending);
1944
1945 if (!cXfersPending)
1946 {
1947 if (fCallHandler)
1948 pdmBlkCacheReqComplete(pBlkCache, pReq);
1949 return true;
1950 }
1951
1952 LogFlowFunc(("pReq=%#p cXfersPending=%u\n", pReq, cXfersPending));
1953 return false;
1954}
1955
1956VMMR3DECL(int) PDMR3BlkCacheRead(PPDMBLKCACHE pBlkCache, uint64_t off,
1957 PCRTSGBUF pSgBuf, size_t cbRead, void *pvUser)
1958{
1959 int rc = VINF_SUCCESS;
1960 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1961 PPDMBLKCACHEENTRY pEntry;
1962 PPDMBLKCACHEREQ pReq;
1963
1964 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pSgBuf=%#p cbRead=%u pvUser=%#p\n",
1965 pBlkCache, pBlkCache->pszId, off, pSgBuf, cbRead, pvUser));
1966
1967 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
1968 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
1969
1970 RTSGBUF SgBuf;
1971 RTSgBufClone(&SgBuf, pSgBuf);
1972
1973 /* Allocate new request structure. */
1974 pReq = pdmBlkCacheReqAlloc(pvUser);
1975 if (RT_UNLIKELY(!pReq))
1976 return VERR_NO_MEMORY;
1977
1978 /* Increment data transfer counter to keep the request valid while we access it. */
1979 ASMAtomicIncU32(&pReq->cXfersPending);
1980
1981 while (cbRead)
1982 {
1983 size_t cbToRead;
1984
1985 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
1986
1987 /*
1988 * If there is no entry we try to create a new one eviciting unused pages
1989 * if the cache is full. If this is not possible we will pass the request through
1990 * and skip the caching (all entries may be still in progress so they can't
1991 * be evicted)
1992 * If we have an entry it can be in one of the LRU lists where the entry
1993 * contains data (recently used or frequently used LRU) so we can just read
1994 * the data we need and put the entry at the head of the frequently used LRU list.
1995 * In case the entry is in one of the ghost lists it doesn't contain any data.
1996 * We have to fetch it again evicting pages from either T1 or T2 to make room.
1997 */
1998 if (pEntry)
1999 {
2000 uint64_t offDiff = off - pEntry->Core.Key;
2001
2002 AssertMsg(off >= pEntry->Core.Key,
2003 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
2004 off, pEntry->Core.Key));
2005
2006 AssertPtr(pEntry->pList);
2007
2008 cbToRead = RT_MIN(pEntry->cbData - offDiff, cbRead);
2009
2010 AssertMsg(off + cbToRead <= pEntry->Core.Key + pEntry->Core.KeyLast + 1,
2011 ("Buffer of cache entry exceeded off=%llu cbToRead=%d\n",
2012 off, cbToRead));
2013
2014 cbRead -= cbToRead;
2015
2016 if (!cbRead)
2017 STAM_COUNTER_INC(&pCache->cHits);
2018 else
2019 STAM_COUNTER_INC(&pCache->cPartialHits);
2020
2021 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
2022
2023 /* Ghost lists contain no data. */
2024 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2025 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2026 {
2027 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2028 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2029 PDMBLKCACHE_ENTRY_IS_DIRTY))
2030 {
2031 /* Entry didn't completed yet. Append to the list */
2032 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2033 &SgBuf, offDiff, cbToRead,
2034 false /* fWrite */);
2035 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2036 }
2037 else
2038 {
2039 /* Read as much as we can from the entry. */
2040 RTSgBufCopyFromBuf(&SgBuf, pEntry->pbData + offDiff, cbToRead);
2041 }
2042
2043 /* Move this entry to the top position */
2044 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2045 {
2046 pdmBlkCacheLockEnter(pCache);
2047 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2048 pdmBlkCacheLockLeave(pCache);
2049 }
2050 /* Release the entry */
2051 pdmBlkCacheEntryRelease(pEntry);
2052 }
2053 else
2054 {
2055 uint8_t *pbBuffer = NULL;
2056
2057 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
2058
2059 pdmBlkCacheLockEnter(pCache);
2060 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2061 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2062
2063 /* Move the entry to Am and fetch it to the cache. */
2064 if (fEnough)
2065 {
2066 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2067 pdmBlkCacheAdd(pCache, pEntry->cbData);
2068 pdmBlkCacheLockLeave(pCache);
2069
2070 if (pbBuffer)
2071 pEntry->pbData = pbBuffer;
2072 else
2073 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2074 AssertPtr(pEntry->pbData);
2075
2076 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2077 &SgBuf, offDiff, cbToRead,
2078 false /* fWrite */);
2079 pdmBlkCacheEntryReadFromMedium(pEntry);
2080 /* Release the entry */
2081 pdmBlkCacheEntryRelease(pEntry);
2082 }
2083 else
2084 {
2085 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2086 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2087 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2088 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2089 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2090
2091 pdmBlkCacheLockLeave(pCache);
2092
2093 RTMemFree(pEntry);
2094
2095 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2096 &SgBuf, off, cbToRead,
2097 PDMBLKCACHEXFERDIR_READ);
2098 }
2099 }
2100 }
2101 else
2102 {
2103#ifdef VBOX_WITH_IO_READ_CACHE
2104 /* No entry found for this offset. Create a new entry and fetch the data to the cache. */
2105 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2106 off, cbRead,
2107 &cbToRead);
2108
2109 cbRead -= cbToRead;
2110
2111 if (pEntryNew)
2112 {
2113 if (!cbRead)
2114 STAM_COUNTER_INC(&pCache->cMisses);
2115 else
2116 STAM_COUNTER_INC(&pCache->cPartialHits);
2117
2118 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2119 &SgBuf,
2120 off - pEntryNew->Core.Key,
2121 cbToRead,
2122 false /* fWrite */);
2123 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2124 pdmBlkCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
2125 }
2126 else
2127 {
2128 /*
2129 * There is not enough free space in the cache.
2130 * Pass the request directly to the I/O manager.
2131 */
2132 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead));
2133
2134 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2135 &SgBuf, off, cbToRead,
2136 PDMBLKCACHEXFERDIR_READ);
2137 }
2138#else
2139 /* Clip read size if necessary. */
2140 PPDMBLKCACHEENTRY pEntryAbove;
2141 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
2142
2143 if (pEntryAbove)
2144 {
2145 if (off + cbRead > pEntryAbove->Core.Key)
2146 cbToRead = pEntryAbove->Core.Key - off;
2147 else
2148 cbToRead = cbRead;
2149
2150 pdmBlkCacheEntryRelease(pEntryAbove);
2151 }
2152 else
2153 cbToRead = cbRead;
2154
2155 cbRead -= cbToRead;
2156 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2157 &SgBuf, off, cbToRead,
2158 PDMBLKCACHEXFERDIR_READ);
2159#endif
2160 }
2161 off += cbToRead;
2162 }
2163
2164 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2165 rc = VINF_AIO_TASK_PENDING;
2166 else
2167 {
2168 rc = pReq->rcReq;
2169 RTMemFree(pReq);
2170 }
2171
2172 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2173
2174 return rc;
2175}
2176
2177VMMR3DECL(int) PDMR3BlkCacheWrite(PPDMBLKCACHE pBlkCache, uint64_t off, PCRTSGBUF pSgBuf, size_t cbWrite, void *pvUser)
2178{
2179 int rc = VINF_SUCCESS;
2180 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2181 PPDMBLKCACHEENTRY pEntry;
2182 PPDMBLKCACHEREQ pReq;
2183
2184 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pSgBuf=%#p cbWrite=%u pvUser=%#p\n",
2185 pBlkCache, pBlkCache->pszId, off, pSgBuf, cbWrite, pvUser));
2186
2187 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2188 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2189
2190 RTSGBUF SgBuf;
2191 RTSgBufClone(&SgBuf, pSgBuf);
2192
2193 /* Allocate new request structure. */
2194 pReq = pdmBlkCacheReqAlloc(pvUser);
2195 if (RT_UNLIKELY(!pReq))
2196 return VERR_NO_MEMORY;
2197
2198 /* Increment data transfer counter to keep the request valid while we access it. */
2199 ASMAtomicIncU32(&pReq->cXfersPending);
2200
2201 while (cbWrite)
2202 {
2203 size_t cbToWrite;
2204
2205 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
2206 if (pEntry)
2207 {
2208 /* Write the data into the entry and mark it as dirty */
2209 AssertPtr(pEntry->pList);
2210
2211 uint64_t offDiff = off - pEntry->Core.Key;
2212 AssertMsg(off >= pEntry->Core.Key, ("Overflow in calculation off=%llu OffsetAligned=%llu\n", off, pEntry->Core.Key));
2213
2214 cbToWrite = RT_MIN(pEntry->cbData - offDiff, cbWrite);
2215 cbWrite -= cbToWrite;
2216
2217 if (!cbWrite)
2218 STAM_COUNTER_INC(&pCache->cHits);
2219 else
2220 STAM_COUNTER_INC(&pCache->cPartialHits);
2221
2222 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2223
2224 /* Ghost lists contain no data. */
2225 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2226 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2227 {
2228 /* Check if the entry is dirty. */
2229 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2230 PDMBLKCACHE_ENTRY_IS_DIRTY,
2231 0))
2232 {
2233 /* If it is already dirty but not in progress just update the data. */
2234 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2235 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2236 else
2237 {
2238 /* The data isn't written to the file yet */
2239 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2240 &SgBuf, offDiff, cbToWrite,
2241 true /* fWrite */);
2242 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2243 }
2244
2245 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2246 }
2247 else /* Dirty bit not set */
2248 {
2249 /*
2250 * Check if a read is in progress for this entry.
2251 * We have to defer processing in that case.
2252 */
2253 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2254 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2255 0))
2256 {
2257 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2258 &SgBuf, offDiff, cbToWrite,
2259 true /* fWrite */);
2260 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2261 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2262 }
2263 else /* I/O in progress flag not set */
2264 {
2265 /* Write as much as we can into the entry and update the file. */
2266 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2267
2268 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2269 if (fCommit)
2270 pdmBlkCacheCommitDirtyEntries(pCache);
2271 }
2272 } /* Dirty bit not set */
2273
2274 /* Move this entry to the top position */
2275 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2276 {
2277 pdmBlkCacheLockEnter(pCache);
2278 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2279 pdmBlkCacheLockLeave(pCache);
2280 }
2281
2282 pdmBlkCacheEntryRelease(pEntry);
2283 }
2284 else /* Entry is on the ghost list */
2285 {
2286 uint8_t *pbBuffer = NULL;
2287
2288 pdmBlkCacheLockEnter(pCache);
2289 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2290 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2291
2292 if (fEnough)
2293 {
2294 /* Move the entry to Am and fetch it to the cache. */
2295 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2296 pdmBlkCacheAdd(pCache, pEntry->cbData);
2297 pdmBlkCacheLockLeave(pCache);
2298
2299 if (pbBuffer)
2300 pEntry->pbData = pbBuffer;
2301 else
2302 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2303 AssertPtr(pEntry->pbData);
2304
2305 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2306 &SgBuf, offDiff, cbToWrite,
2307 true /* fWrite */);
2308 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2309 pdmBlkCacheEntryReadFromMedium(pEntry);
2310
2311 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
2312 pdmBlkCacheEntryRelease(pEntry);
2313 }
2314 else
2315 {
2316 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2317 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2318 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2319 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2320 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2321
2322 pdmBlkCacheLockLeave(pCache);
2323
2324 RTMemFree(pEntry);
2325 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2326 &SgBuf, off, cbToWrite,
2327 PDMBLKCACHEXFERDIR_WRITE);
2328 }
2329 }
2330 }
2331 else /* No entry found */
2332 {
2333 /*
2334 * No entry found. Try to create a new cache entry to store the data in and if that fails
2335 * write directly to the file.
2336 */
2337 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2338 off, cbWrite,
2339 &cbToWrite);
2340
2341 cbWrite -= cbToWrite;
2342
2343 if (pEntryNew)
2344 {
2345 uint64_t offDiff = off - pEntryNew->Core.Key;
2346
2347 STAM_COUNTER_INC(&pCache->cHits);
2348
2349 /*
2350 * Check if it is possible to just write the data without waiting
2351 * for it to get fetched first.
2352 */
2353 if (!offDiff && pEntryNew->cbData == cbToWrite)
2354 {
2355 RTSgBufCopyToBuf(&SgBuf, pEntryNew->pbData, cbToWrite);
2356
2357 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntryNew);
2358 if (fCommit)
2359 pdmBlkCacheCommitDirtyEntries(pCache);
2360 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2361 }
2362 else
2363 {
2364 /* Defer the write and fetch the data from the endpoint. */
2365 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2366 &SgBuf, offDiff, cbToWrite,
2367 true /* fWrite */);
2368 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2369 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2370 }
2371
2372 pdmBlkCacheEntryRelease(pEntryNew);
2373 }
2374 else
2375 {
2376 /*
2377 * There is not enough free space in the cache.
2378 * Pass the request directly to the I/O manager.
2379 */
2380 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite));
2381
2382 STAM_COUNTER_INC(&pCache->cMisses);
2383
2384 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2385 &SgBuf, off, cbToWrite,
2386 PDMBLKCACHEXFERDIR_WRITE);
2387 }
2388 }
2389
2390 off += cbToWrite;
2391 }
2392
2393 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2394 rc = VINF_AIO_TASK_PENDING;
2395 else
2396 {
2397 rc = pReq->rcReq;
2398 RTMemFree(pReq);
2399 }
2400
2401 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2402
2403 return rc;
2404}
2405
2406VMMR3DECL(int) PDMR3BlkCacheFlush(PPDMBLKCACHE pBlkCache, void *pvUser)
2407{
2408 int rc = VINF_SUCCESS;
2409 PPDMBLKCACHEREQ pReq;
2410
2411 LogFlowFunc((": pBlkCache=%#p{%s}\n", pBlkCache, pBlkCache->pszId));
2412
2413 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2414 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2415
2416 /* Commit dirty entries in the cache. */
2417 pdmBlkCacheCommit(pBlkCache);
2418
2419 /* Allocate new request structure. */
2420 pReq = pdmBlkCacheReqAlloc(pvUser);
2421 if (RT_UNLIKELY(!pReq))
2422 return VERR_NO_MEMORY;
2423
2424 rc = pdmBlkCacheRequestPassthrough(pBlkCache, pReq, NULL, 0, 0,
2425 PDMBLKCACHEXFERDIR_FLUSH);
2426 AssertRC(rc);
2427
2428 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2429 return VINF_AIO_TASK_PENDING;
2430}
2431
2432VMMR3DECL(int) PDMR3BlkCacheDiscard(PPDMBLKCACHE pBlkCache, PCRTRANGE paRanges,
2433 unsigned cRanges, void *pvUser)
2434{
2435 int rc = VINF_SUCCESS;
2436 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2437 PPDMBLKCACHEENTRY pEntry;
2438 PPDMBLKCACHEREQ pReq;
2439
2440 LogFlowFunc((": pBlkCache=%#p{%s} paRanges=%#p cRanges=%u pvUser=%#p\n",
2441 pBlkCache, pBlkCache->pszId, paRanges, cRanges, pvUser));
2442
2443 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2444 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2445
2446 /* Allocate new request structure. */
2447 pReq = pdmBlkCacheReqAlloc(pvUser);
2448 if (RT_UNLIKELY(!pReq))
2449 return VERR_NO_MEMORY;
2450
2451 /* Increment data transfer counter to keep the request valid while we access it. */
2452 ASMAtomicIncU32(&pReq->cXfersPending);
2453
2454 for (unsigned i = 0; i < cRanges; i++)
2455 {
2456 uint64_t offCur = paRanges[i].offStart;
2457 size_t cbLeft = paRanges[i].cbRange;
2458
2459 while (cbLeft)
2460 {
2461 size_t cbThisDiscard = 0;
2462
2463 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, offCur);
2464
2465 if (pEntry)
2466 {
2467 /* Write the data into the entry and mark it as dirty */
2468 AssertPtr(pEntry->pList);
2469
2470 uint64_t offDiff = offCur - pEntry->Core.Key;
2471
2472 AssertMsg(offCur >= pEntry->Core.Key,
2473 ("Overflow in calculation offCur=%llu OffsetAligned=%llu\n",
2474 offCur, pEntry->Core.Key));
2475
2476 cbThisDiscard = RT_MIN(pEntry->cbData - offDiff, cbLeft);
2477
2478 /* Ghost lists contain no data. */
2479 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2480 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2481 {
2482 /* Check if the entry is dirty. */
2483 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2484 PDMBLKCACHE_ENTRY_IS_DIRTY,
2485 0))
2486 {
2487 /* If it is dirty but not yet in progress remove it. */
2488 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2489 {
2490 pdmBlkCacheLockEnter(pCache);
2491 pdmBlkCacheEntryRemoveFromList(pEntry);
2492
2493 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2494 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2495 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2496
2497 pdmBlkCacheLockLeave(pCache);
2498
2499 RTMemFree(pEntry);
2500 }
2501 else
2502 {
2503#if 0
2504 /* The data isn't written to the file yet */
2505 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2506 &SgBuf, offDiff, cbToWrite,
2507 true /* fWrite */);
2508 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2509#endif
2510 }
2511
2512 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2513 pdmBlkCacheEntryRelease(pEntry);
2514 }
2515 else /* Dirty bit not set */
2516 {
2517 /*
2518 * Check if a read is in progress for this entry.
2519 * We have to defer processing in that case.
2520 */
2521 if(pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2522 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2523 0))
2524 {
2525#if 0
2526 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2527 &SgBuf, offDiff, cbToWrite,
2528 true /* fWrite */);
2529#endif
2530 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2531 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2532 pdmBlkCacheEntryRelease(pEntry);
2533 }
2534 else /* I/O in progress flag not set */
2535 {
2536 pdmBlkCacheLockEnter(pCache);
2537 pdmBlkCacheEntryRemoveFromList(pEntry);
2538
2539 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2540 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2541 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2542 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2543 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2544
2545 pdmBlkCacheLockLeave(pCache);
2546
2547 RTMemFree(pEntry);
2548 }
2549 } /* Dirty bit not set */
2550 }
2551 else /* Entry is on the ghost list just remove cache entry. */
2552 {
2553 pdmBlkCacheLockEnter(pCache);
2554 pdmBlkCacheEntryRemoveFromList(pEntry);
2555
2556 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2557 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2558 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2559 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2560 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2561
2562 pdmBlkCacheLockLeave(pCache);
2563
2564 RTMemFree(pEntry);
2565 }
2566 }
2567 /* else: no entry found. */
2568
2569 offCur += cbThisDiscard;
2570 cbLeft -= cbThisDiscard;
2571 }
2572 }
2573
2574 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2575 rc = VINF_AIO_TASK_PENDING;
2576 else
2577 {
2578 rc = pReq->rcReq;
2579 RTMemFree(pReq);
2580 }
2581
2582 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2583
2584 return rc;
2585}
2586
2587/**
2588 * Completes a task segment freeing all resources and completes the task handle
2589 * if everything was transferred.
2590 *
2591 * @returns Next task segment handle.
2592 * @param pBlkCache The endpoint block cache.
2593 * @param pWaiter Task segment to complete.
2594 * @param rc Status code to set.
2595 */
2596static PPDMBLKCACHEWAITER pdmBlkCacheWaiterComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEWAITER pWaiter, int rc)
2597{
2598 PPDMBLKCACHEWAITER pNext = pWaiter->pNext;
2599 PPDMBLKCACHEREQ pReq = pWaiter->pReq;
2600
2601 pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, true);
2602
2603 RTMemFree(pWaiter);
2604
2605 return pNext;
2606}
2607
2608static void pdmBlkCacheIoXferCompleteEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2609{
2610 PPDMBLKCACHEENTRY pEntry = hIoXfer->pEntry;
2611 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2612
2613 /* Reference the entry now as we are clearing the I/O in progress flag
2614 * which protected the entry till now. */
2615 pdmBlkCacheEntryRef(pEntry);
2616
2617 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2618 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
2619
2620 /* Process waiting segment list. The data in entry might have changed in-between. */
2621 bool fDirty = false;
2622 PPDMBLKCACHEWAITER pComplete = pEntry->pWaitingHead;
2623 PPDMBLKCACHEWAITER pCurr = pComplete;
2624
2625 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
2626 ("The list tail was not updated correctly\n"));
2627 pEntry->pWaitingTail = NULL;
2628 pEntry->pWaitingHead = NULL;
2629
2630 if (hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_WRITE)
2631 {
2632 /*
2633 * An error here is difficult to handle as the original request completed already.
2634 * The error is logged for now and the VM is paused.
2635 * If the user continues the entry is written again in the hope
2636 * the user fixed the problem and the next write succeeds.
2637 */
2638 if (RT_FAILURE(rcIoXfer))
2639 {
2640 LogRel(("I/O cache: Error while writing entry at offset %llu (%u bytes) to medium \"%s\" (rc=%Rrc)\n",
2641 pEntry->Core.Key, pEntry->cbData, pBlkCache->pszId, rcIoXfer));
2642
2643 if (!ASMAtomicXchgBool(&pCache->fIoErrorVmSuspended, true))
2644 {
2645 int rc = VMSetRuntimeError(pCache->pVM, VMSETRTERR_FLAGS_SUSPEND | VMSETRTERR_FLAGS_NO_WAIT, "BLKCACHE_IOERR",
2646 N_("The I/O cache encountered an error while updating data in medium \"%s\" (rc=%Rrc). "
2647 "Make sure there is enough free space on the disk and that the disk is working properly. "
2648 "Operation can be resumed afterwards"),
2649 pBlkCache->pszId, rcIoXfer);
2650 AssertRC(rc);
2651 }
2652
2653 /* Mark the entry as dirty again to get it added to the list later on. */
2654 fDirty = true;
2655 }
2656
2657 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IS_DIRTY;
2658
2659 while (pCurr)
2660 {
2661 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
2662
2663 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2664 fDirty = true;
2665 pCurr = pCurr->pNext;
2666 }
2667 }
2668 else
2669 {
2670 AssertMsg(hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_READ, ("Invalid transfer type\n"));
2671 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY),
2672 ("Invalid flags set\n"));
2673
2674 while (pCurr)
2675 {
2676 if (pCurr->fWrite)
2677 {
2678 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2679 fDirty = true;
2680 }
2681 else
2682 RTSgBufCopyFromBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2683
2684 pCurr = pCurr->pNext;
2685 }
2686 }
2687
2688 bool fCommit = false;
2689 if (fDirty)
2690 fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2691
2692 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2693
2694 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
2695 pdmBlkCacheEntryRelease(pEntry);
2696
2697 if (fCommit)
2698 pdmBlkCacheCommitDirtyEntries(pCache);
2699
2700 /* Complete waiters now. */
2701 while (pComplete)
2702 pComplete = pdmBlkCacheWaiterComplete(pBlkCache, pComplete, rcIoXfer);
2703}
2704
2705VMMR3DECL(void) PDMR3BlkCacheIoXferComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2706{
2707 LogFlowFunc(("pBlkCache=%#p hIoXfer=%#p rcIoXfer=%Rrc\n", pBlkCache, hIoXfer, rcIoXfer));
2708
2709 if (hIoXfer->fIoCache)
2710 pdmBlkCacheIoXferCompleteEntry(pBlkCache, hIoXfer, rcIoXfer);
2711 else
2712 pdmBlkCacheReqUpdate(pBlkCache, hIoXfer->pReq, rcIoXfer, true);
2713
2714 ASMAtomicDecU32(&pBlkCache->cIoXfersActive);
2715 pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: I/O req %#p (%RTbool) completed (%u now active)",
2716 hIoXfer, hIoXfer->fIoCache, pBlkCache->cIoXfersActive);
2717 RTMemFree(hIoXfer);
2718}
2719
2720/**
2721 * Callback for the AVL do with all routine. Waits for a cachen entry to finish any pending I/O.
2722 *
2723 * @returns IPRT status code.
2724 * @param pNode The node to destroy.
2725 * @param pvUser Opaque user data.
2726 */
2727static DECLCALLBACK(int) pdmBlkCacheEntryQuiesce(PAVLRU64NODECORE pNode, void *pvUser)
2728{
2729 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
2730 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
2731 NOREF(pvUser);
2732
2733 while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)
2734 {
2735 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
2736 pdmBlkCacheEntryRef(pEntry);
2737 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2738
2739 RTThreadSleep(1);
2740
2741 /* Re-enter all locks and drop the reference. */
2742 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2743 pdmBlkCacheEntryRelease(pEntry);
2744 }
2745
2746 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
2747 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
2748
2749 return VINF_SUCCESS;
2750}
2751
2752VMMR3DECL(int) PDMR3BlkCacheSuspend(PPDMBLKCACHE pBlkCache)
2753{
2754 int rc = VINF_SUCCESS;
2755 LogFlowFunc(("pBlkCache=%#p\n", pBlkCache));
2756
2757 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2758
2759 if (!ASMAtomicReadBool(&pBlkCache->pCache->fIoErrorVmSuspended))
2760 pdmBlkCacheCommit(pBlkCache); /* Can issue new I/O requests. */
2761 ASMAtomicXchgBool(&pBlkCache->fSuspended, true);
2762
2763 /* Wait for all I/O to complete. */
2764 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2765 rc = RTAvlrU64DoWithAll(pBlkCache->pTree, true, pdmBlkCacheEntryQuiesce, NULL);
2766 AssertRC(rc);
2767 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2768
2769 return rc;
2770}
2771
2772VMMR3DECL(int) PDMR3BlkCacheResume(PPDMBLKCACHE pBlkCache)
2773{
2774 LogFlowFunc(("pBlkCache=%#p\n", pBlkCache));
2775
2776 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2777
2778 ASMAtomicXchgBool(&pBlkCache->fSuspended, false);
2779
2780 return VINF_SUCCESS;
2781}
2782
2783VMMR3DECL(int) PDMR3BlkCacheClear(PPDMBLKCACHE pBlkCache)
2784{
2785 int rc = VINF_SUCCESS;
2786 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2787
2788 /*
2789 * Commit all dirty entries now (they are waited on for completion during the
2790 * destruction of the AVL tree below).
2791 * The exception is if the VM was paused because of an I/O error before.
2792 */
2793 if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
2794 pdmBlkCacheCommit(pBlkCache);
2795
2796 /* Make sure nobody is accessing the cache while we delete the tree. */
2797 pdmBlkCacheLockEnter(pCache);
2798 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2799 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
2800 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2801
2802 pdmBlkCacheLockLeave(pCache);
2803 return rc;
2804}
2805
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use