VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFileCache.cpp@ 25414

Last change on this file since 25414 was 25271, checked in by vboxsync, 15 years ago

PDMAsyncCompletionFileCache.cpp: warnings and don't release pEntryBestFit until after using it.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 76.1 KB
Line 
1/* $Id: PDMAsyncCompletionFileCache.cpp 25271 2009-12-09 14:55:54Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 * File data cache.
5 */
6
7/*
8 * Copyright (C) 2006-2008 Sun Microsystems, Inc.
9 *
10 * This file is part of VirtualBox Open Source Edition (OSE), as
11 * available from http://www.virtualbox.org. This file is free software;
12 * you can redistribute it and/or modify it under the terms of the GNU
13 * General Public License (GPL) as published by the Free Software
14 * Foundation, in version 2 as it comes in the "COPYING" file of the
15 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17 *
18 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
19 * Clara, CA 95054 USA or visit http://www.sun.com if you need
20 * additional information or have any questions.
21 */
22
23/** @page pg_pdm_async_completion_cache PDM Async Completion Cache - The file I/O cache
24 * This component implements an I/O cache for file endpoints based on the ARC algorithm.
25 * http://en.wikipedia.org/wiki/Adaptive_Replacement_Cache
26 *
27 * The algorithm uses four LRU (Least frequently used) lists to store data in the cache.
28 * Two of them contain data where one stores entries which were accessed recently and one
29 * which is used for frequently accessed data.
30 * The other two lists are called ghost lists and store information about the accessed range
31 * but do not contain data. They are used to track data access. If these entries are accessed
32 * they will push the data to a higher position in the cache preventing it from getting removed
33 * quickly again.
34 *
35 * The algorithm needs to be modified to meet our requirements. Like the implementation
36 * for the ZFS filesystem we need to handle pages with a variable size. It would
37 * be possible to use a fixed size but would increase the computational
38 * and memory overhead.
39 * Because we do I/O asynchronously we also need to mark entries which are currently accessed
40 * as non evictable to prevent removal of the entry while the data is being accessed.
41 */
42
43/*******************************************************************************
44* Header Files *
45*******************************************************************************/
46#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
47#define RT_STRICT
48#include <iprt/types.h>
49#include <iprt/mem.h>
50#include <iprt/path.h>
51#include <VBox/log.h>
52#include <VBox/stam.h>
53
54#include "PDMAsyncCompletionFileInternal.h"
55
56#ifdef VBOX_STRICT
57# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
58 do \
59 { \
60 AssertMsg(RTCritSectIsOwner(&pCache->CritSect), \
61 ("Thread does not own critical section\n"));\
62 } while(0);
63#else
64# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while(0);
65#endif
66
67/*******************************************************************************
68* Internal Functions *
69*******************************************************************************/
70static void pdmacFileCacheTaskCompleted(PPDMACTASKFILE pTask, void *pvUser);
71
72DECLINLINE(void) pdmacFileEpCacheEntryRelease(PPDMACFILECACHEENTRY pEntry)
73{
74 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
75 ASMAtomicDecU32(&pEntry->cRefs);
76}
77
78DECLINLINE(void) pdmacFileEpCacheEntryRef(PPDMACFILECACHEENTRY pEntry)
79{
80 ASMAtomicIncU32(&pEntry->cRefs);
81}
82
83/**
84 * Checks consistency of a LRU list.
85 *
86 * @returns nothing
87 * @param pList The LRU list to check.
88 * @param pNotInList Element which is not allowed to occur in the list.
89 */
90static void pdmacFileCacheCheckList(PPDMACFILELRULIST pList, PPDMACFILECACHEENTRY pNotInList)
91{
92#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
93 PPDMACFILECACHEENTRY pCurr = pList->pHead;
94
95 /* Check that there are no double entries and no cycles in the list. */
96 while (pCurr)
97 {
98 PPDMACFILECACHEENTRY pNext = pCurr->pNext;
99
100 while (pNext)
101 {
102 AssertMsg(pCurr != pNext,
103 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
104 pCurr, pList));
105 pNext = pNext->pNext;
106 }
107
108 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
109
110 if (!pCurr->pNext)
111 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
112
113 pCurr = pCurr->pNext;
114 }
115#endif
116}
117
118/**
119 * Unlinks a cache entry from the LRU list it is assigned to.
120 *
121 * @returns nothing.
122 * @param pEntry The entry to unlink.
123 */
124static void pdmacFileCacheEntryRemoveFromList(PPDMACFILECACHEENTRY pEntry)
125{
126 PPDMACFILELRULIST pList = pEntry->pList;
127 PPDMACFILECACHEENTRY pPrev, pNext;
128
129 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
130
131 AssertPtr(pList);
132 pdmacFileCacheCheckList(pList, NULL);
133
134 pPrev = pEntry->pPrev;
135 pNext = pEntry->pNext;
136
137 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
138 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
139
140 if (pPrev)
141 pPrev->pNext = pNext;
142 else
143 {
144 pList->pHead = pNext;
145
146 if (pNext)
147 pNext->pPrev = NULL;
148 }
149
150 if (pNext)
151 pNext->pPrev = pPrev;
152 else
153 {
154 pList->pTail = pPrev;
155
156 if (pPrev)
157 pPrev->pNext = NULL;
158 }
159
160 pEntry->pList = NULL;
161 pEntry->pPrev = NULL;
162 pEntry->pNext = NULL;
163 pList->cbCached -= pEntry->cbData;
164 pdmacFileCacheCheckList(pList, pEntry);
165}
166
167/**
168 * Adds a cache entry to the given LRU list unlinking it from the currently
169 * assigned list if needed.
170 *
171 * @returns nothing.
172 * @param pList List to the add entry to.
173 * @param pEntry Entry to add.
174 */
175static void pdmacFileCacheEntryAddToList(PPDMACFILELRULIST pList, PPDMACFILECACHEENTRY pEntry)
176{
177 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
178 pdmacFileCacheCheckList(pList, NULL);
179
180 /* Remove from old list if needed */
181 if (pEntry->pList)
182 pdmacFileCacheEntryRemoveFromList(pEntry);
183
184 pEntry->pNext = pList->pHead;
185 if (pList->pHead)
186 pList->pHead->pPrev = pEntry;
187 else
188 {
189 Assert(!pList->pTail);
190 pList->pTail = pEntry;
191 }
192
193 pEntry->pPrev = NULL;
194 pList->pHead = pEntry;
195 pList->cbCached += pEntry->cbData;
196 pEntry->pList = pList;
197 pdmacFileCacheCheckList(pList, NULL);
198}
199
200/**
201 * Destroys a LRU list freeing all entries.
202 *
203 * @returns nothing
204 * @param pList Pointer to the LRU list to destroy.
205 *
206 * @note The caller must own the critical section of the cache.
207 */
208static void pdmacFileCacheDestroyList(PPDMACFILELRULIST pList)
209{
210 while (pList->pHead)
211 {
212 PPDMACFILECACHEENTRY pEntry = pList->pHead;
213
214 pList->pHead = pEntry->pNext;
215
216 AssertMsg(!(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY)),
217 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
218
219 RTMemPageFree(pEntry->pbData);
220 RTMemFree(pEntry);
221 }
222}
223
224/**
225 * Tries to remove the given amount of bytes from a given list in the cache
226 * moving the entries to one of the given ghosts lists
227 *
228 * @returns Amount of data which could be freed.
229 * @param pCache Pointer to the global cache data.
230 * @param cbData The amount of the data to free.
231 * @param pListSrc The source list to evict data from.
232 * @param pGhostListSrc The ghost list removed entries should be moved to
233 * NULL if the entry should be freed.
234 * @param fReuseBuffer Flag whether a buffer should be reused if it has the same size
235 * @param ppbBuf Where to store the address of the buffer if an entry with the
236 * same size was found and fReuseBuffer is true.
237 *
238 * @notes This function may return fewer bytes than requested because entries
239 * may be marked as non evictable if they are used for I/O at the moment.
240 */
241static size_t pdmacFileCacheEvictPagesFrom(PPDMACFILECACHEGLOBAL pCache, size_t cbData,
242 PPDMACFILELRULIST pListSrc, PPDMACFILELRULIST pGhostListDst,
243 bool fReuseBuffer, uint8_t **ppbBuffer)
244{
245 size_t cbEvicted = 0;
246
247 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
248
249 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
250#ifdef VBOX_WITH_2Q_CACHE
251 AssertMsg( !pGhostListDst
252 || (pGhostListDst == &pCache->LruRecentlyUsedOut),
253 ("Destination list must be NULL or the recently used but paged out list\n"));
254#else
255 AssertMsg( !pGhostListDst
256 || (pGhostListDst == &pCache->LruRecentlyGhost)
257 || (pGhostListDst == &pCache->LruFrequentlyGhost),
258 ("Destination list must be NULL or one of the ghost lists\n"));
259#endif
260
261 if (fReuseBuffer)
262 {
263 AssertPtr(ppbBuffer);
264 *ppbBuffer = NULL;
265 }
266
267 /* Start deleting from the tail. */
268 PPDMACFILECACHEENTRY pEntry = pListSrc->pTail;
269
270 while ((cbEvicted < cbData) && pEntry)
271 {
272 PPDMACFILECACHEENTRY pCurr = pEntry;
273
274 pEntry = pEntry->pPrev;
275
276 /* We can't evict pages which are currently in progress */
277 if (!(pCurr->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS)
278 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
279 {
280 /* Ok eviction candidate. Grab the endpoint semaphore and check again
281 * because somebody else might have raced us. */
282 PPDMACFILEENDPOINTCACHE pEndpointCache = &pCurr->pEndpoint->DataCache;
283 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
284
285 if (!(pCurr->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS)
286 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
287 {
288 AssertMsg(!(pCurr->fFlags & PDMACFILECACHE_ENTRY_IS_DEPRECATED),
289 ("This entry is deprecated so it should have the I/O in progress flag set\n"));
290 Assert(!pCurr->pbDataReplace);
291
292 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
293
294 if (fReuseBuffer && (pCurr->cbData == cbData))
295 {
296 STAM_COUNTER_INC(&pCache->StatBuffersReused);
297 *ppbBuffer = pCurr->pbData;
298 }
299 else if (pCurr->pbData)
300 RTMemPageFree(pCurr->pbData);
301
302 pCurr->pbData = NULL;
303 cbEvicted += pCurr->cbData;
304
305 pCache->cbCached -= pCurr->cbData;
306
307 if (pGhostListDst)
308 {
309#ifdef VBOX_WITH_2Q_CACHE
310 /* We have to remove the last entries from the paged out list. */
311 while (pGhostListDst->cbCached > pCache->cbRecentlyUsedOutMax)
312 {
313 PPDMACFILECACHEENTRY pFree = pGhostListDst->pTail;
314 PPDMACFILEENDPOINTCACHE pEndpointCacheFree = &pFree->pEndpoint->DataCache;
315
316 RTSemRWRequestWrite(pEndpointCacheFree->SemRWEntries, RT_INDEFINITE_WAIT);
317
318 pdmacFileCacheEntryRemoveFromList(pFree);
319
320 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
321 RTAvlrFileOffsetRemove(pEndpointCacheFree->pTree, pFree->Core.Key);
322 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
323
324 RTSemRWReleaseWrite(pEndpointCacheFree->SemRWEntries);
325 RTMemFree(pFree);
326 }
327#endif
328
329 pdmacFileCacheEntryAddToList(pGhostListDst, pCurr);
330 }
331 else
332 {
333 /* Delete the entry from the AVL tree it is assigned to. */
334 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
335 RTAvlrFileOffsetRemove(pCurr->pEndpoint->DataCache.pTree, pCurr->Core.Key);
336 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
337
338 pdmacFileCacheEntryRemoveFromList(pCurr);
339 RTMemFree(pCurr);
340 }
341 }
342 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
343 }
344 else
345 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
346 }
347
348 return cbEvicted;
349}
350
351#ifdef VBOX_WITH_2Q_CACHE
352static bool pdmacFileCacheReclaim(PPDMACFILECACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
353{
354 size_t cbRemoved = 0;
355
356 if ((pCache->cbCached + cbData) < pCache->cbMax)
357 return true;
358 else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax)
359 {
360 /* Try to evict as many bytes as possible from A1in */
361 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn,
362 &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer);
363
364 /*
365 * If it was not possible to remove enough entries
366 * try the frequently accessed cache.
367 */
368 if (cbRemoved < cbData)
369 {
370 Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */
371
372 cbRemoved += pdmacFileCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
373 NULL, fReuseBuffer, ppbBuffer);
374 }
375 }
376 else
377 {
378 /* We have to remove entries from frequently access list. */
379 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
380 NULL, fReuseBuffer, ppbBuffer);
381 }
382
383 LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData));
384 return (cbRemoved >= cbData);
385}
386
387#else
388
389static size_t pdmacFileCacheReplace(PPDMACFILECACHEGLOBAL pCache, size_t cbData, PPDMACFILELRULIST pEntryList,
390 bool fReuseBuffer, uint8_t **ppbBuffer)
391{
392 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
393
394 if ( (pCache->LruRecentlyUsed.cbCached)
395 && ( (pCache->LruRecentlyUsed.cbCached > pCache->uAdaptVal)
396 || ( (pEntryList == &pCache->LruFrequentlyGhost)
397 && (pCache->LruRecentlyUsed.cbCached == pCache->uAdaptVal))))
398 {
399 /* We need to remove entry size pages from T1 and move the entries to B1 */
400 return pdmacFileCacheEvictPagesFrom(pCache, cbData,
401 &pCache->LruRecentlyUsed,
402 &pCache->LruRecentlyGhost,
403 fReuseBuffer, ppbBuffer);
404 }
405 else
406 {
407 /* We need to remove entry size pages from T2 and move the entries to B2 */
408 return pdmacFileCacheEvictPagesFrom(pCache, cbData,
409 &pCache->LruFrequentlyUsed,
410 &pCache->LruFrequentlyGhost,
411 fReuseBuffer, ppbBuffer);
412 }
413}
414
415/**
416 * Tries to evict the given amount of the data from the cache.
417 *
418 * @returns Bytes removed.
419 * @param pCache The global cache data.
420 * @param cbData Number of bytes to evict.
421 */
422static size_t pdmacFileCacheEvict(PPDMACFILECACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
423{
424 size_t cbRemoved = ~0;
425
426 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
427
428 if ((pCache->LruRecentlyUsed.cbCached + pCache->LruRecentlyGhost.cbCached) >= pCache->cbMax)
429 {
430 /* Delete desired pages from the cache. */
431 if (pCache->LruRecentlyUsed.cbCached < pCache->cbMax)
432 {
433 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData,
434 &pCache->LruRecentlyGhost,
435 NULL,
436 fReuseBuffer, ppbBuffer);
437 }
438 else
439 {
440 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData,
441 &pCache->LruRecentlyUsed,
442 NULL,
443 fReuseBuffer, ppbBuffer);
444 }
445 }
446 else
447 {
448 uint32_t cbUsed = pCache->LruRecentlyUsed.cbCached + pCache->LruRecentlyGhost.cbCached +
449 pCache->LruFrequentlyUsed.cbCached + pCache->LruFrequentlyGhost.cbCached;
450
451 if (cbUsed >= pCache->cbMax)
452 {
453 if (cbUsed == 2*pCache->cbMax)
454 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData,
455 &pCache->LruFrequentlyGhost,
456 NULL,
457 fReuseBuffer, ppbBuffer);
458
459 if (cbRemoved >= cbData)
460 cbRemoved = pdmacFileCacheReplace(pCache, cbData, NULL, fReuseBuffer, ppbBuffer);
461 }
462 }
463
464 return cbRemoved;
465}
466
467/**
468 * Updates the cache parameters
469 *
470 * @returns nothing.
471 * @param pCache The global cache data.
472 * @param pEntry The entry usign for the update.
473 */
474static void pdmacFileCacheUpdate(PPDMACFILECACHEGLOBAL pCache, PPDMACFILECACHEENTRY pEntry)
475{
476 int32_t uUpdateVal = 0;
477
478 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
479
480 /* Update parameters */
481 if (pEntry->pList == &pCache->LruRecentlyGhost)
482 {
483 if (pCache->LruRecentlyGhost.cbCached >= pCache->LruFrequentlyGhost.cbCached)
484 uUpdateVal = 1;
485 else
486 uUpdateVal = pCache->LruFrequentlyGhost.cbCached / pCache->LruRecentlyGhost.cbCached;
487
488 pCache->uAdaptVal = RT_MIN(pCache->uAdaptVal + uUpdateVal, pCache->cbMax);
489 }
490 else if (pEntry->pList == &pCache->LruFrequentlyGhost)
491 {
492 if (pCache->LruFrequentlyGhost.cbCached >= pCache->LruRecentlyGhost.cbCached)
493 uUpdateVal = 1;
494 else
495 uUpdateVal = pCache->LruRecentlyGhost.cbCached / pCache->LruFrequentlyGhost.cbCached;
496
497 pCache->uAdaptVal = RT_MIN(pCache->uAdaptVal - uUpdateVal, 0);
498 }
499 else
500 AssertMsgFailed(("Invalid list type\n"));
501}
502#endif
503
504/**
505 * Initiates a read I/O task for the given entry.
506 *
507 * @returns nothing.
508 * @param pEntry The entry to fetch the data to.
509 */
510static void pdmacFileCacheReadFromEndpoint(PPDMACFILECACHEENTRY pEntry)
511{
512 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
513
514 /* Make sure no one evicts the entry while it is accessed. */
515 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
516
517 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEntry->pEndpoint);
518 AssertPtr(pIoTask);
519
520 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
521
522 pIoTask->pEndpoint = pEntry->pEndpoint;
523 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_READ;
524 pIoTask->Off = pEntry->Core.Key;
525 pIoTask->DataSeg.cbSeg = pEntry->cbData;
526 pIoTask->DataSeg.pvSeg = pEntry->pbData;
527 pIoTask->pvUser = pEntry;
528 pIoTask->pfnCompleted = pdmacFileCacheTaskCompleted;
529
530 /* Send it off to the I/O manager. */
531 pdmacFileEpAddTask(pEntry->pEndpoint, pIoTask);
532}
533
534/**
535 * Initiates a write I/O task for the given entry.
536 *
537 * @returns nothing.
538 * @param pEntry The entry to read the data from.
539 */
540static void pdmacFileCacheWriteToEndpoint(PPDMACFILECACHEENTRY pEntry)
541{
542 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
543
544 /* Make sure no one evicts the entry while it is accessed. */
545 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
546
547 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEntry->pEndpoint);
548 AssertPtr(pIoTask);
549
550 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
551
552 pIoTask->pEndpoint = pEntry->pEndpoint;
553 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_WRITE;
554 pIoTask->Off = pEntry->Core.Key;
555 pIoTask->DataSeg.cbSeg = pEntry->cbData;
556 pIoTask->DataSeg.pvSeg = pEntry->pbData;
557 pIoTask->pvUser = pEntry;
558 pIoTask->pfnCompleted = pdmacFileCacheTaskCompleted;
559
560 /* Send it off to the I/O manager. */
561 pdmacFileEpAddTask(pEntry->pEndpoint, pIoTask);
562}
563
564/**
565 * Completes a task segment freeing all ressources and completes the task handle
566 * if everything was transfered.
567 *
568 * @returns Next task segment handle.
569 * @param pEndpointCache The endpoint cache.
570 * @param pTaskSeg Task segment to complete.
571 */
572static PPDMACFILETASKSEG pdmacFileCacheTaskComplete(PPDMACFILEENDPOINTCACHE pEndpointCache, PPDMACFILETASKSEG pTaskSeg)
573{
574 PPDMACFILETASKSEG pNext = pTaskSeg->pNext;
575
576 uint32_t uOld = ASMAtomicSubU32(&pTaskSeg->pTask->cbTransferLeft, pTaskSeg->cbTransfer);
577 AssertMsg(uOld >= pTaskSeg->cbTransfer, ("New value would overflow\n"));
578 if (!(uOld - pTaskSeg->cbTransfer)
579 && !ASMAtomicXchgBool(&pTaskSeg->pTask->fCompleted, true))
580 {
581 pdmR3AsyncCompletionCompleteTask(&pTaskSeg->pTask->Core);
582
583 if (pTaskSeg->fWrite)
584 {
585 /* Complete a pending flush if all writes have completed */
586 uint32_t cWritesOutstanding = ASMAtomicDecU32(&pEndpointCache->cWritesOutstanding);
587 PPDMASYNCCOMPLETIONTASKFILE pTaskFlush = (PPDMASYNCCOMPLETIONTASKFILE)ASMAtomicXchgPtr((void * volatile *)&pEndpointCache->pTaskFlush, NULL);
588
589 if (!cWritesOutstanding && pTaskFlush)
590 pdmR3AsyncCompletionCompleteTask(&pTaskFlush->Core);
591 }
592 }
593
594 RTMemFree(pTaskSeg);
595
596 return pNext;
597}
598
599/**
600 * Completion callback for I/O tasks.
601 *
602 * @returns nothing.
603 * @param pTask The completed task.
604 * @param pvUser Opaque user data.
605 */
606static void pdmacFileCacheTaskCompleted(PPDMACTASKFILE pTask, void *pvUser)
607{
608 PPDMACFILECACHEENTRY pEntry = (PPDMACFILECACHEENTRY)pvUser;
609 PPDMACFILECACHEGLOBAL pCache = pEntry->pCache;
610 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pEntry->pEndpoint;
611 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
612
613 /* Reference the entry now as we are clearing the I/O in progres flag
614 * which protects the entry till now. */
615 pdmacFileEpCacheEntryRef(pEntry);
616
617 RTSemRWRequestWrite(pEndpoint->DataCache.SemRWEntries, RT_INDEFINITE_WAIT);
618 pEntry->fFlags &= ~PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
619
620 /* Process waiting segment list. The data in entry might have changed inbetween. */
621 PPDMACFILETASKSEG pCurr = pEntry->pWaitingHead;
622
623 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
624 ("The list tail was not updated correctly\n"));
625 pEntry->pWaitingTail = NULL;
626 pEntry->pWaitingHead = NULL;
627
628 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
629 {
630 if (pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DEPRECATED)
631 {
632 AssertMsg(!pCurr, ("The entry is deprecated but has waiting write segments attached\n"));
633
634 RTMemPageFree(pEntry->pbData);
635 pEntry->pbData = pEntry->pbDataReplace;
636 pEntry->pbDataReplace = NULL;
637 pEntry->fFlags &= ~PDMACFILECACHE_ENTRY_IS_DEPRECATED;
638 }
639 else
640 {
641 pEntry->fFlags &= ~PDMACFILECACHE_ENTRY_IS_DIRTY;
642
643 while (pCurr)
644 {
645 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
646
647 memcpy(pEntry->pbData + pCurr->uBufOffset, pCurr->pvBuf, pCurr->cbTransfer);
648 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
649
650 pCurr = pdmacFileCacheTaskComplete(pEndpointCache, pCurr);
651 }
652 }
653 }
654 else
655 {
656 AssertMsg(pTask->enmTransferType == PDMACTASKFILETRANSFER_READ, ("Invalid transfer type\n"));
657 AssertMsg(!(pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DIRTY),("Invalid flags set\n"));
658
659 while (pCurr)
660 {
661 if (pCurr->fWrite)
662 {
663 memcpy(pEntry->pbData + pCurr->uBufOffset, pCurr->pvBuf, pCurr->cbTransfer);
664 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
665 }
666 else
667 memcpy(pCurr->pvBuf, pEntry->pbData + pCurr->uBufOffset, pCurr->cbTransfer);
668
669 pCurr = pdmacFileCacheTaskComplete(pEndpointCache, pCurr);
670 }
671 }
672
673 if (pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DIRTY)
674 pdmacFileCacheWriteToEndpoint(pEntry);
675
676 RTSemRWReleaseWrite(pEndpoint->DataCache.SemRWEntries);
677
678 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
679 pdmacFileEpCacheEntryRelease(pEntry);
680}
681
682/**
683 * Initializies the I/O cache.
684 *
685 * returns VBox status code.
686 * @param pClassFile The global class data for file endpoints.
687 * @param pCfgNode CFGM node to query configuration data from.
688 */
689int pdmacFileCacheInit(PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile, PCFGMNODE pCfgNode)
690{
691 int rc = VINF_SUCCESS;
692 PPDMACFILECACHEGLOBAL pCache = &pClassFile->Cache;
693
694 rc = CFGMR3QueryU32Def(pCfgNode, "CacheSize", &pCache->cbMax, 5 * _1M);
695 AssertLogRelRCReturn(rc, rc);
696
697 pCache->cbCached = 0;
698 LogFlowFunc((": Maximum number of bytes cached %u\n", pCache->cbMax));
699
700 /* Initialize members */
701#ifdef VBOX_WITH_2Q_CACHE
702 pCache->LruRecentlyUsedIn.pHead = NULL;
703 pCache->LruRecentlyUsedIn.pTail = NULL;
704 pCache->LruRecentlyUsedIn.cbCached = 0;
705
706 pCache->LruRecentlyUsedOut.pHead = NULL;
707 pCache->LruRecentlyUsedOut.pTail = NULL;
708 pCache->LruRecentlyUsedOut.cbCached = 0;
709
710 pCache->LruFrequentlyUsed.pHead = NULL;
711 pCache->LruFrequentlyUsed.pTail = NULL;
712 pCache->LruFrequentlyUsed.cbCached = 0;
713
714 pCache->cbRecentlyUsedInMax = (pCache->cbMax / 100) * 25; /* 25% of the buffer size */
715 pCache->cbRecentlyUsedOutMax = (pCache->cbMax / 100) * 50; /* 50% of the buffer size */
716 LogFlowFunc((": cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n", pCache->cbRecentlyUsedInMax, pCache->cbRecentlyUsedOutMax));
717#else
718 pCache->LruRecentlyUsed.pHead = NULL;
719 pCache->LruRecentlyUsed.pTail = NULL;
720 pCache->LruRecentlyUsed.cbCached = 0;
721
722 pCache->LruFrequentlyUsed.pHead = NULL;
723 pCache->LruFrequentlyUsed.pTail = NULL;
724 pCache->LruFrequentlyUsed.cbCached = 0;
725
726 pCache->LruRecentlyGhost.pHead = NULL;
727 pCache->LruRecentlyGhost.pTail = NULL;
728 pCache->LruRecentlyGhost.cbCached = 0;
729
730 pCache->LruFrequentlyGhost.pHead = NULL;
731 pCache->LruFrequentlyGhost.pTail = NULL;
732 pCache->LruFrequentlyGhost.cbCached = 0;
733
734 pCache->uAdaptVal = 0;
735#endif
736
737 STAMR3Register(pClassFile->Core.pVM, &pCache->cbMax,
738 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
739 "/PDM/AsyncCompletion/File/cbMax",
740 STAMUNIT_BYTES,
741 "Maximum cache size");
742 STAMR3Register(pClassFile->Core.pVM, &pCache->cbCached,
743 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
744 "/PDM/AsyncCompletion/File/cbCached",
745 STAMUNIT_BYTES,
746 "Currently used cache");
747#ifdef VBOX_WITH_2Q_CACHE
748 STAMR3Register(pClassFile->Core.pVM, &pCache->LruRecentlyUsedIn.cbCached,
749 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
750 "/PDM/AsyncCompletion/File/cbCachedMruIn",
751 STAMUNIT_BYTES,
752 "Number of bytes cached in MRU list");
753 STAMR3Register(pClassFile->Core.pVM, &pCache->LruRecentlyUsedOut.cbCached,
754 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
755 "/PDM/AsyncCompletion/File/cbCachedMruOut",
756 STAMUNIT_BYTES,
757 "Number of bytes cached in FRU list");
758 STAMR3Register(pClassFile->Core.pVM, &pCache->LruFrequentlyUsed.cbCached,
759 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
760 "/PDM/AsyncCompletion/File/cbCachedFru",
761 STAMUNIT_BYTES,
762 "Number of bytes cached in FRU ghost list");
763#else
764 STAMR3Register(pClassFile->Core.pVM, &pCache->LruRecentlyUsed.cbCached,
765 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
766 "/PDM/AsyncCompletion/File/cbCachedMru",
767 STAMUNIT_BYTES,
768 "Number of bytes cached in Mru list");
769 STAMR3Register(pClassFile->Core.pVM, &pCache->LruFrequentlyUsed.cbCached,
770 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
771 "/PDM/AsyncCompletion/File/cbCachedFru",
772 STAMUNIT_BYTES,
773 "Number of bytes cached in Fru list");
774 STAMR3Register(pClassFile->Core.pVM, &pCache->LruRecentlyGhost.cbCached,
775 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
776 "/PDM/AsyncCompletion/File/cbCachedMruGhost",
777 STAMUNIT_BYTES,
778 "Number of bytes cached in Mru ghost list");
779 STAMR3Register(pClassFile->Core.pVM, &pCache->LruFrequentlyGhost.cbCached,
780 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
781 "/PDM/AsyncCompletion/File/cbCachedFruGhost",
782 STAMUNIT_BYTES, "Number of bytes cached in Fru ghost list");
783#endif
784
785#ifdef VBOX_WITH_STATISTICS
786 STAMR3Register(pClassFile->Core.pVM, &pCache->cHits,
787 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
788 "/PDM/AsyncCompletion/File/CacheHits",
789 STAMUNIT_COUNT, "Number of hits in the cache");
790 STAMR3Register(pClassFile->Core.pVM, &pCache->cPartialHits,
791 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
792 "/PDM/AsyncCompletion/File/CachePartialHits",
793 STAMUNIT_COUNT, "Number of partial hits in the cache");
794 STAMR3Register(pClassFile->Core.pVM, &pCache->cMisses,
795 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
796 "/PDM/AsyncCompletion/File/CacheMisses",
797 STAMUNIT_COUNT, "Number of misses when accessing the cache");
798 STAMR3Register(pClassFile->Core.pVM, &pCache->StatRead,
799 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
800 "/PDM/AsyncCompletion/File/CacheRead",
801 STAMUNIT_BYTES, "Number of bytes read from the cache");
802 STAMR3Register(pClassFile->Core.pVM, &pCache->StatWritten,
803 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
804 "/PDM/AsyncCompletion/File/CacheWritten",
805 STAMUNIT_BYTES, "Number of bytes written to the cache");
806 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeGet,
807 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
808 "/PDM/AsyncCompletion/File/CacheTreeGet",
809 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
810 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeInsert,
811 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
812 "/PDM/AsyncCompletion/File/CacheTreeInsert",
813 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
814 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeRemove,
815 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
816 "/PDM/AsyncCompletion/File/CacheTreeRemove",
817 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
818 STAMR3Register(pClassFile->Core.pVM, &pCache->StatBuffersReused,
819 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
820 "/PDM/AsyncCompletion/File/CacheBuffersReused",
821 STAMUNIT_COUNT, "Number of times a buffer could be reused");
822#ifndef VBOX_WITH_2Q_CACHE
823 STAMR3Register(pClassFile->Core.pVM, &pCache->uAdaptVal,
824 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
825 "/PDM/AsyncCompletion/File/CacheAdaptValue",
826 STAMUNIT_COUNT,
827 "Adaption value of the cache");
828#endif
829#endif
830
831 /* Initialize the critical section */
832 rc = RTCritSectInit(&pCache->CritSect);
833
834 if (RT_SUCCESS(rc))
835 LogRel(("AIOMgr: Cache successfully initialised. Cache size is %u bytes\n", pCache->cbMax));
836
837 return rc;
838}
839
840/**
841 * Destroysthe cache freeing all data.
842 *
843 * returns nothing.
844 * @param pClassFile The global class data for file endpoints.
845 */
846void pdmacFileCacheDestroy(PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile)
847{
848 PPDMACFILECACHEGLOBAL pCache = &pClassFile->Cache;
849
850 /* Make sure no one else uses the cache now */
851 RTCritSectEnter(&pCache->CritSect);
852
853#ifdef VBOX_WITH_2Q_CACHE
854 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
855 pdmacFileCacheDestroyList(&pCache->LruRecentlyUsedIn);
856 pdmacFileCacheDestroyList(&pCache->LruRecentlyUsedOut);
857 pdmacFileCacheDestroyList(&pCache->LruFrequentlyUsed);
858#else
859 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
860 pdmacFileCacheDestroyList(&pCache->LruRecentlyUsed);
861 pdmacFileCacheDestroyList(&pCache->LruFrequentlyUsed);
862 pdmacFileCacheDestroyList(&pCache->LruRecentlyGhost);
863 pdmacFileCacheDestroyList(&pCache->LruFrequentlyGhost);
864#endif
865
866 RTCritSectLeave(&pCache->CritSect);
867
868 RTCritSectDelete(&pCache->CritSect);
869}
870
871/**
872 * Initializes per endpoint cache data
873 * like the AVL tree used to access cached entries.
874 *
875 * @returns VBox status code.
876 * @param pEndpoint The endpoint to init the cache for,
877 * @param pClassFile The global class data for file endpoints.
878 */
879int pdmacFileEpCacheInit(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile)
880{
881 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
882
883 pEndpointCache->pCache = &pClassFile->Cache;
884
885 int rc = RTSemRWCreate(&pEndpointCache->SemRWEntries);
886 if (RT_SUCCESS(rc))
887 {
888 pEndpointCache->pTree = (PAVLRFOFFTREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
889 if (!pEndpointCache->pTree)
890 {
891 rc = VERR_NO_MEMORY;
892 RTSemRWDestroy(pEndpointCache->SemRWEntries);
893 }
894 }
895
896#ifdef VBOX_WITH_STATISTICS
897 if (RT_SUCCESS(rc))
898 {
899 STAMR3RegisterF(pClassFile->Core.pVM, &pEndpointCache->StatWriteDeferred,
900 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
901 STAMUNIT_COUNT, "Number of deferred writes",
902 "/PDM/AsyncCompletion/File/%s/Cache/DeferredWrites", RTPathFilename(pEndpoint->Core.pszUri));
903 }
904#endif
905
906 return rc;
907}
908
909/**
910 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
911 *
912 * @returns IPRT status code.
913 * @param pNode The node to destroy.
914 * @param pvUser Opaque user data.
915 */
916static int pdmacFileEpCacheEntryDestroy(PAVLRFOFFNODECORE pNode, void *pvUser)
917{
918 PPDMACFILECACHEENTRY pEntry = (PPDMACFILECACHEENTRY)pNode;
919 PPDMACFILECACHEGLOBAL pCache = (PPDMACFILECACHEGLOBAL)pvUser;
920 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEntry->pEndpoint->DataCache;
921
922 while (pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY))
923 {
924 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
925 RTThreadSleep(250);
926 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
927 }
928
929 AssertMsg(!(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY)),
930 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
931
932 pdmacFileCacheEntryRemoveFromList(pEntry);
933 pCache->cbCached -= pEntry->cbData;
934
935 RTMemPageFree(pEntry->pbData);
936 RTMemFree(pEntry);
937
938 return VINF_SUCCESS;
939}
940
941/**
942 * Destroys all cache ressources used by the given endpoint.
943 *
944 * @returns nothing.
945 * @param pEndpoint The endpoint to the destroy.
946 */
947void pdmacFileEpCacheDestroy(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
948{
949 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
950 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
951
952 /* Make sure nobody is accessing the cache while we delete the tree. */
953 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
954 RTCritSectEnter(&pCache->CritSect);
955 RTAvlrFileOffsetDestroy(pEndpointCache->pTree, pdmacFileEpCacheEntryDestroy, pCache);
956 RTCritSectLeave(&pCache->CritSect);
957 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
958
959 RTSemRWDestroy(pEndpointCache->SemRWEntries);
960
961#ifdef VBOX_WITH_STATISTICS
962 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
963
964 STAMR3Deregister(pEpClassFile->Core.pVM, &pEndpointCache->StatWriteDeferred);
965#endif
966}
967
968static PPDMACFILECACHEENTRY pdmacFileEpCacheGetCacheEntryByOffset(PPDMACFILEENDPOINTCACHE pEndpointCache, RTFOFF off)
969{
970 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
971 PPDMACFILECACHEENTRY pEntry = NULL;
972
973 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
974
975 RTSemRWRequestRead(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
976 pEntry = (PPDMACFILECACHEENTRY)RTAvlrFileOffsetRangeGet(pEndpointCache->pTree, off);
977 if (pEntry)
978 pdmacFileEpCacheEntryRef(pEntry);
979 RTSemRWReleaseRead(pEndpointCache->SemRWEntries);
980
981 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
982
983 return pEntry;
984}
985
986static PPDMACFILECACHEENTRY pdmacFileEpCacheGetCacheBestFitEntryByOffset(PPDMACFILEENDPOINTCACHE pEndpointCache, RTFOFF off)
987{
988 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
989 PPDMACFILECACHEENTRY pEntry = NULL;
990
991 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
992
993 RTSemRWRequestRead(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
994 pEntry = (PPDMACFILECACHEENTRY)RTAvlrFileOffsetGetBestFit(pEndpointCache->pTree, off, true /*fAbove*/);
995 if (pEntry)
996 pdmacFileEpCacheEntryRef(pEntry);
997 RTSemRWReleaseRead(pEndpointCache->SemRWEntries);
998
999 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
1000
1001 return pEntry;
1002}
1003
1004static void pdmacFileEpCacheInsertEntry(PPDMACFILEENDPOINTCACHE pEndpointCache, PPDMACFILECACHEENTRY pEntry)
1005{
1006 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1007
1008 STAM_PROFILE_ADV_START(&pCache->StatTreeInsert, Cache);
1009 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1010 bool fInserted = RTAvlrFileOffsetInsert(pEndpointCache->pTree, &pEntry->Core);
1011 AssertMsg(fInserted, ("Node was not inserted into tree\n"));
1012 STAM_PROFILE_ADV_STOP(&pCache->StatTreeInsert, Cache);
1013 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1014}
1015
1016/**
1017 * Allocates and initializes a new entry for the cache.
1018 * The entry has a reference count of 1.
1019 *
1020 * @returns Pointer to the new cache entry or NULL if out of memory.
1021 * @param pCache The cache the entry belongs to.
1022 * @param pEndoint The endpoint the entry holds data for.
1023 * @param off Start offset.
1024 * @param cbData Size of the cache entry.
1025 * @param pbBuffer Pointer to the buffer to use.
1026 * NULL if a new buffer should be allocated.
1027 * The buffer needs to have the same size of the entry.
1028 */
1029static PPDMACFILECACHEENTRY pdmacFileCacheEntryAlloc(PPDMACFILECACHEGLOBAL pCache,
1030 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
1031 RTFOFF off, size_t cbData, uint8_t *pbBuffer)
1032{
1033 PPDMACFILECACHEENTRY pEntryNew = (PPDMACFILECACHEENTRY)RTMemAllocZ(sizeof(PDMACFILECACHEENTRY));
1034
1035 if (RT_UNLIKELY(!pEntryNew))
1036 return NULL;
1037
1038 pEntryNew->Core.Key = off;
1039 pEntryNew->Core.KeyLast = off + cbData - 1;
1040 pEntryNew->pEndpoint = pEndpoint;
1041 pEntryNew->pCache = pCache;
1042 pEntryNew->fFlags = 0;
1043 pEntryNew->cRefs = 1; /* We are using it now. */
1044 pEntryNew->pList = NULL;
1045 pEntryNew->cbData = cbData;
1046 pEntryNew->pWaitingHead = NULL;
1047 pEntryNew->pWaitingTail = NULL;
1048 pEntryNew->pbDataReplace = NULL;
1049 if (pbBuffer)
1050 pEntryNew->pbData = pbBuffer;
1051 else
1052 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
1053
1054 if (RT_UNLIKELY(!pEntryNew->pbData))
1055 {
1056 RTMemFree(pEntryNew);
1057 return NULL;
1058 }
1059
1060 return pEntryNew;
1061}
1062
1063/**
1064 * Adds a segment to the waiting list for a cache entry
1065 * which is currently in progress.
1066 *
1067 * @returns nothing.
1068 * @param pEntry The cache entry to add the segment to.
1069 * @param pSeg The segment to add.
1070 */
1071DECLINLINE(void) pdmacFileEpCacheEntryAddWaitingSegment(PPDMACFILECACHEENTRY pEntry, PPDMACFILETASKSEG pSeg)
1072{
1073 pSeg->pNext = NULL;
1074
1075 if (pEntry->pWaitingHead)
1076 {
1077 AssertPtr(pEntry->pWaitingTail);
1078
1079 pEntry->pWaitingTail->pNext = pSeg;
1080 pEntry->pWaitingTail = pSeg;
1081 }
1082 else
1083 {
1084 Assert(!pEntry->pWaitingTail);
1085
1086 pEntry->pWaitingHead = pSeg;
1087 pEntry->pWaitingTail = pSeg;
1088 }
1089}
1090
1091/**
1092 * Checks that a set of flags is set/clear acquiring the R/W semaphore
1093 * in exclusive mode.
1094 *
1095 * @returns true if the flag in fSet is set and the one in fClear is clear.
1096 * false othwerise.
1097 * The R/W semaphore is only held if true is returned.
1098 *
1099 * @param pEndpointCache The endpoint cache instance data.
1100 * @param pEntry The entry to check the flags for.
1101 * @param fSet The flag which is tested to be set.
1102 * @param fClear The flag which is tested to be clear.
1103 */
1104DECLINLINE(bool) pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(PPDMACFILEENDPOINTCACHE pEndpointCache,
1105 PPDMACFILECACHEENTRY pEntry,
1106 uint32_t fSet, uint32_t fClear)
1107{
1108 bool fPassed = ((pEntry->fFlags & fSet) && !(pEntry->fFlags & fClear));
1109
1110 if (fPassed)
1111 {
1112 /* Acquire the lock and check again becuase the completion callback might have raced us. */
1113 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1114
1115 fPassed = ((pEntry->fFlags & fSet) && !(pEntry->fFlags & fClear));
1116
1117 /* Drop the lock if we didn't passed the test. */
1118 if (!fPassed)
1119 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1120 }
1121
1122 return fPassed;
1123}
1124
1125/**
1126 * Advances the current segment buffer by the number of bytes transfered
1127 * or gets the next segment.
1128 */
1129#define ADVANCE_SEGMENT_BUFFER(BytesTransfered) \
1130 do \
1131 { \
1132 cbSegLeft -= BytesTransfered; \
1133 if (!cbSegLeft) \
1134 { \
1135 iSegCurr++; \
1136 cbSegLeft = paSegments[iSegCurr].cbSeg; \
1137 pbSegBuf = (uint8_t *)paSegments[iSegCurr].pvSeg; \
1138 } \
1139 else \
1140 pbSegBuf += BytesTransfered; \
1141 } \
1142 while (0)
1143
1144/**
1145 * Reads the specified data from the endpoint using the cache if possible.
1146 *
1147 * @returns VBox status code.
1148 * @param pEndpoint The endpoint to read from.
1149 * @param pTask The task structure used as identifier for this request.
1150 * @param off The offset to start reading from.
1151 * @param paSegments Pointer to the array holding the destination buffers.
1152 * @param cSegments Number of segments in the array.
1153 * @param cbRead Number of bytes to read.
1154 */
1155int pdmacFileEpCacheRead(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask,
1156 RTFOFF off, PCPDMDATASEG paSegments, size_t cSegments,
1157 size_t cbRead)
1158{
1159 int rc = VINF_SUCCESS;
1160 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
1161 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1162 PPDMACFILECACHEENTRY pEntry;
1163
1164 LogFlowFunc((": pEndpoint=%#p{%s} pTask=%#p off=%RTfoff paSegments=%#p cSegments=%u cbRead=%u\n",
1165 pEndpoint, pEndpoint->Core.pszUri, pTask, off, paSegments, cSegments, cbRead));
1166
1167 pTask->cbTransferLeft = cbRead;
1168 /* Set to completed to make sure that the task is valid while we access it. */
1169 ASMAtomicWriteBool(&pTask->fCompleted, true);
1170
1171 int iSegCurr = 0;
1172 uint8_t *pbSegBuf = (uint8_t *)paSegments[iSegCurr].pvSeg;
1173 size_t cbSegLeft = paSegments[iSegCurr].cbSeg;
1174
1175 while (cbRead)
1176 {
1177 size_t cbToRead;
1178
1179 pEntry = pdmacFileEpCacheGetCacheEntryByOffset(pEndpointCache, off);
1180
1181 /*
1182 * If there is no entry we try to create a new one eviciting unused pages
1183 * if the cache is full. If this is not possible we will pass the request through
1184 * and skip the caching (all entries may be still in progress so they can't
1185 * be evicted)
1186 * If we have an entry it can be in one of the LRU lists where the entry
1187 * contains data (recently used or frequently used LRU) so we can just read
1188 * the data we need and put the entry at the head of the frequently used LRU list.
1189 * In case the entry is in one of the ghost lists it doesn't contain any data.
1190 * We have to fetch it again evicting pages from either T1 or T2 to make room.
1191 */
1192 if (pEntry)
1193 {
1194 RTFOFF OffDiff = off - pEntry->Core.Key;
1195
1196 AssertMsg(off >= pEntry->Core.Key,
1197 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1198 off, pEntry->Core.Key));
1199
1200 AssertPtr(pEntry->pList);
1201
1202 cbToRead = RT_MIN(pEntry->cbData - OffDiff, cbRead);
1203 cbRead -= cbToRead;
1204
1205 if (!cbRead)
1206 STAM_COUNTER_INC(&pCache->cHits);
1207 else
1208 STAM_COUNTER_INC(&pCache->cPartialHits);
1209
1210 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
1211
1212 /* Ghost lists contain no data. */
1213#ifdef VBOX_WITH_2Q_CACHE
1214 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1215 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1216 {
1217#else
1218 if ( (pEntry->pList == &pCache->LruRecentlyUsed)
1219 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1220 {
1221#endif
1222 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1223 PDMACFILECACHE_ENTRY_IS_DEPRECATED,
1224 0))
1225 {
1226 /* Entry is deprecated. Read data from the new buffer. */
1227 while (cbToRead)
1228 {
1229 size_t cbCopy = RT_MIN(cbSegLeft, cbToRead);
1230
1231 memcpy(pbSegBuf, pEntry->pbDataReplace + OffDiff, cbCopy);
1232
1233 ADVANCE_SEGMENT_BUFFER(cbCopy);
1234
1235 cbToRead -= cbCopy;
1236 off += cbCopy;
1237 OffDiff += cbCopy;
1238 ASMAtomicSubS32(&pTask->cbTransferLeft, cbCopy);
1239 }
1240 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1241 }
1242 else
1243 {
1244 if (pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1245 PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1246 PDMACFILECACHE_ENTRY_IS_DIRTY))
1247 {
1248 /* Entry didn't completed yet. Append to the list */
1249 while (cbToRead)
1250 {
1251 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1252
1253 pSeg->pTask = pTask;
1254 pSeg->uBufOffset = OffDiff;
1255 pSeg->cbTransfer = RT_MIN(cbToRead, cbSegLeft);
1256 pSeg->pvBuf = pbSegBuf;
1257 pSeg->fWrite = false;
1258
1259 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1260
1261 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1262
1263 off += pSeg->cbTransfer;
1264 cbToRead -= pSeg->cbTransfer;
1265 OffDiff += pSeg->cbTransfer;
1266 }
1267 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1268 }
1269 else
1270 {
1271 /* Read as much as we can from the entry. */
1272 while (cbToRead)
1273 {
1274 size_t cbCopy = RT_MIN(cbSegLeft, cbToRead);
1275
1276 memcpy(pbSegBuf, pEntry->pbData + OffDiff, cbCopy);
1277
1278 ADVANCE_SEGMENT_BUFFER(cbCopy);
1279
1280 cbToRead -= cbCopy;
1281 off += cbCopy;
1282 OffDiff += cbCopy;
1283 ASMAtomicSubS32(&pTask->cbTransferLeft, cbCopy);
1284 }
1285 }
1286 }
1287
1288 /* Move this entry to the top position */
1289#ifdef VBOX_WITH_2Q_CACHE
1290 if (pEntry->pList == &pCache->LruFrequentlyUsed)
1291 {
1292 RTCritSectEnter(&pCache->CritSect);
1293 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1294 RTCritSectLeave(&pCache->CritSect);
1295 }
1296#else
1297 RTCritSectEnter(&pCache->CritSect);
1298 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1299 RTCritSectLeave(&pCache->CritSect);
1300#endif
1301 }
1302 else
1303 {
1304 uint8_t *pbBuffer = NULL;
1305
1306 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
1307
1308#ifdef VBOX_WITH_2Q_CACHE
1309 RTCritSectEnter(&pCache->CritSect);
1310 pdmacFileCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
1311 pdmacFileCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
1312
1313 /* Move the entry to Am and fetch it to the cache. */
1314 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1315 RTCritSectLeave(&pCache->CritSect);
1316#else
1317 RTCritSectEnter(&pCache->CritSect);
1318 pdmacFileCacheUpdate(pCache, pEntry);
1319 pdmacFileCacheReplace(pCache, pEntry->cbData, pEntry->pList, true, &pbBuffer);
1320
1321 /* Move the entry to T2 and fetch it to the cache. */
1322 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1323 RTCritSectLeave(&pCache->CritSect);
1324#endif
1325
1326 if (pbBuffer)
1327 pEntry->pbData = pbBuffer;
1328 else
1329 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1330 AssertPtr(pEntry->pbData);
1331
1332 while (cbToRead)
1333 {
1334 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1335
1336 AssertMsg(off >= pEntry->Core.Key,
1337 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1338 off, pEntry->Core.Key));
1339
1340 pSeg->pTask = pTask;
1341 pSeg->uBufOffset = OffDiff;
1342 pSeg->cbTransfer = RT_MIN(cbToRead, cbSegLeft);
1343 pSeg->pvBuf = pbSegBuf;
1344
1345 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1346
1347 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1348
1349 off += pSeg->cbTransfer;
1350 OffDiff += pSeg->cbTransfer;
1351 cbToRead -= pSeg->cbTransfer;
1352 }
1353
1354 pdmacFileCacheReadFromEndpoint(pEntry);
1355 }
1356 pdmacFileEpCacheEntryRelease(pEntry);
1357 }
1358 else
1359 {
1360 /* No entry found for this offset. Get best fit entry and fetch the data to the cache. */
1361 size_t cbToReadAligned;
1362 PPDMACFILECACHEENTRY pEntryBestFit = pdmacFileEpCacheGetCacheBestFitEntryByOffset(pEndpointCache, off);
1363
1364 LogFlow(("%sbest fit entry for off=%RTfoff (BestFit=%RTfoff BestFitEnd=%RTfoff BestFitSize=%u)\n",
1365 pEntryBestFit ? "" : "No ",
1366 off,
1367 pEntryBestFit ? pEntryBestFit->Core.Key : 0,
1368 pEntryBestFit ? pEntryBestFit->Core.KeyLast : 0,
1369 pEntryBestFit ? pEntryBestFit->cbData : 0));
1370
1371 if ( pEntryBestFit
1372 && off + (RTFOFF)cbRead > pEntryBestFit->Core.Key)
1373 {
1374 cbToRead = pEntryBestFit->Core.Key - off;
1375 pdmacFileEpCacheEntryRelease(pEntryBestFit);
1376 cbToReadAligned = cbToRead;
1377 }
1378 else
1379 {
1380 /*
1381 * Align the size to a 4KB boundary.
1382 * Memory size is aligned to a page boundary
1383 * and memory is wasted if the size is rahter small.
1384 * (For example reads with a size of 512 bytes.
1385 */
1386 cbToRead = cbRead;
1387 cbToReadAligned = RT_ALIGN_Z(cbRead, PAGE_SIZE);
1388
1389 /* Clip read to file size */
1390 cbToReadAligned = RT_MIN(pEndpoint->cbFile - off, cbToReadAligned);
1391 if (pEntryBestFit)
1392 {
1393 Assert(pEntryBestFit->Core.Key >= off);
1394 cbToReadAligned = RT_MIN(cbToReadAligned, (uint64_t)pEntryBestFit->Core.Key - off);
1395 pdmacFileEpCacheEntryRelease(pEntryBestFit);
1396 }
1397 }
1398
1399 cbRead -= cbToRead;
1400
1401 if (!cbRead)
1402 STAM_COUNTER_INC(&pCache->cMisses);
1403 else
1404 STAM_COUNTER_INC(&pCache->cPartialHits);
1405
1406 uint8_t *pbBuffer = NULL;
1407
1408#ifdef VBOX_WITH_2Q_CACHE
1409 RTCritSectEnter(&pCache->CritSect);
1410 bool fEnough = pdmacFileCacheReclaim(pCache, cbToReadAligned, true, &pbBuffer);
1411 RTCritSectLeave(&pCache->CritSect);
1412
1413 if (fEnough)
1414 {
1415 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbToReadAligned));
1416#else
1417 RTCritSectEnter(&pCache->CritSect);
1418 size_t cbRemoved = pdmacFileCacheEvict(pCache, cbToReadAligned, true, &pbBuffer);
1419 RTCritSectLeave(&pCache->CritSect);
1420
1421 if (cbRemoved >= cbToReadAligned)
1422 {
1423 LogFlow(("Evicted %u bytes (%u requested). Creating new cache entry\n", cbRemoved, cbToReadAligned));
1424#endif
1425 PPDMACFILECACHEENTRY pEntryNew = pdmacFileCacheEntryAlloc(pCache, pEndpoint, off, cbToReadAligned, pbBuffer);
1426 AssertPtr(pEntryNew);
1427
1428 RTCritSectEnter(&pCache->CritSect);
1429#ifdef VBOX_WITH_2Q_CACHE
1430 pdmacFileCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1431#else
1432 pdmacFileCacheEntryAddToList(&pCache->LruRecentlyUsed, pEntryNew);
1433#endif
1434 pCache->cbCached += cbToReadAligned;
1435 RTCritSectLeave(&pCache->CritSect);
1436
1437 pdmacFileEpCacheInsertEntry(pEndpointCache, pEntryNew);
1438 uint32_t uBufOffset = 0;
1439
1440 while (cbToRead)
1441 {
1442 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1443
1444 pSeg->pTask = pTask;
1445 pSeg->uBufOffset = uBufOffset;
1446 pSeg->cbTransfer = RT_MIN(cbToRead, cbSegLeft);
1447 pSeg->pvBuf = pbSegBuf;
1448
1449 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1450
1451 pdmacFileEpCacheEntryAddWaitingSegment(pEntryNew, pSeg);
1452
1453 off += pSeg->cbTransfer;
1454 cbToRead -= pSeg->cbTransfer;
1455 uBufOffset += pSeg->cbTransfer;
1456 }
1457
1458 pdmacFileCacheReadFromEndpoint(pEntryNew);
1459 pdmacFileEpCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
1460 }
1461 else
1462 {
1463 /*
1464 * There is not enough free space in the cache.
1465 * Pass the request directly to the I/O manager.
1466 */
1467 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead));
1468
1469 while (cbToRead)
1470 {
1471 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEndpoint);
1472 AssertPtr(pIoTask);
1473
1474 pIoTask->pEndpoint = pEndpoint;
1475 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_READ;
1476 pIoTask->Off = off;
1477 pIoTask->DataSeg.cbSeg = RT_MIN(cbToRead, cbSegLeft);
1478 pIoTask->DataSeg.pvSeg = pbSegBuf;
1479 pIoTask->pvUser = pTask;
1480 pIoTask->pfnCompleted = pdmacFileEpTaskCompleted;
1481
1482 off += pIoTask->DataSeg.cbSeg;
1483 cbToRead -= pIoTask->DataSeg.cbSeg;
1484
1485 ADVANCE_SEGMENT_BUFFER(pIoTask->DataSeg.cbSeg);
1486
1487 /* Send it off to the I/O manager. */
1488 pdmacFileEpAddTask(pEndpoint, pIoTask);
1489 }
1490 }
1491 }
1492 }
1493
1494 ASMAtomicWriteBool(&pTask->fCompleted, false);
1495
1496 if (ASMAtomicReadS32(&pTask->cbTransferLeft) == 0
1497 && !ASMAtomicXchgBool(&pTask->fCompleted, true))
1498 pdmR3AsyncCompletionCompleteTask(&pTask->Core);
1499
1500 return rc;
1501}
1502
1503/**
1504 * Writes the given data to the endpoint using the cache if possible.
1505 *
1506 * @returns VBox status code.
1507 * @param pEndpoint The endpoint to write to.
1508 * @param pTask The task structure used as identifier for this request.
1509 * @param off The offset to start writing to
1510 * @param paSegments Pointer to the array holding the source buffers.
1511 * @param cSegments Number of segments in the array.
1512 * @param cbWrite Number of bytes to write.
1513 */
1514int pdmacFileEpCacheWrite(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask,
1515 RTFOFF off, PCPDMDATASEG paSegments, size_t cSegments,
1516 size_t cbWrite)
1517{
1518 int rc = VINF_SUCCESS;
1519 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
1520 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1521 PPDMACFILECACHEENTRY pEntry;
1522
1523 LogFlowFunc((": pEndpoint=%#p{%s} pTask=%#p off=%RTfoff paSegments=%#p cSegments=%u cbWrite=%u\n",
1524 pEndpoint, pEndpoint->Core.pszUri, pTask, off, paSegments, cSegments, cbWrite));
1525
1526 pTask->cbTransferLeft = cbWrite;
1527 /* Set to completed to make sure that the task is valid while we access it. */
1528 ASMAtomicWriteBool(&pTask->fCompleted, true);
1529 ASMAtomicIncU32(&pEndpointCache->cWritesOutstanding);
1530
1531 int iSegCurr = 0;
1532 uint8_t *pbSegBuf = (uint8_t *)paSegments[iSegCurr].pvSeg;
1533 size_t cbSegLeft = paSegments[iSegCurr].cbSeg;
1534
1535 while (cbWrite)
1536 {
1537 size_t cbToWrite;
1538
1539 pEntry = pdmacFileEpCacheGetCacheEntryByOffset(pEndpointCache, off);
1540
1541 if (pEntry)
1542 {
1543 /* Write the data into the entry and mark it as dirty */
1544 AssertPtr(pEntry->pList);
1545
1546 RTFOFF OffDiff = off - pEntry->Core.Key;
1547
1548 AssertMsg(off >= pEntry->Core.Key,
1549 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1550 off, pEntry->Core.Key));
1551
1552 cbToWrite = RT_MIN(pEntry->cbData - OffDiff, cbWrite);
1553 cbWrite -= cbToWrite;
1554
1555 if (!cbWrite)
1556 STAM_COUNTER_INC(&pCache->cHits);
1557 else
1558 STAM_COUNTER_INC(&pCache->cPartialHits);
1559
1560 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
1561
1562 /* Ghost lists contain no data. */
1563#ifdef VBOX_WITH_2Q_CACHE
1564 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1565 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1566#else
1567 if ( (pEntry->pList == &pCache->LruRecentlyUsed)
1568 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1569#endif
1570 {
1571 /* Check if the buffer is deprecated. */
1572 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1573 PDMACFILECACHE_ENTRY_IS_DEPRECATED,
1574 0))
1575 {
1576 AssertMsg(pEntry->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1577 ("Entry is deprecated but not in progress\n"));
1578 AssertPtr(pEntry->pbDataReplace);
1579
1580 LogFlow(("Writing to deprecated buffer of entry %#p\n", pEntry));
1581
1582 /* Update the data from the write. */
1583 while (cbToWrite)
1584 {
1585 size_t cbCopy = RT_MIN(cbSegLeft, cbToWrite);
1586
1587 memcpy(pEntry->pbDataReplace + OffDiff, pbSegBuf, cbCopy);
1588
1589 ADVANCE_SEGMENT_BUFFER(cbCopy);
1590
1591 cbToWrite-= cbCopy;
1592 off += cbCopy;
1593 OffDiff += cbCopy;
1594 ASMAtomicSubS32(&pTask->cbTransferLeft, cbCopy);
1595 }
1596 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1597 }
1598 else /* Deprecated flag not set */
1599 {
1600 /* If the entry is dirty it must be also in progress now and we have to defer updating it again. */
1601 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1602 PDMACFILECACHE_ENTRY_IS_DIRTY,
1603 0))
1604 {
1605 AssertMsg(pEntry->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1606 ("Entry is dirty but not in progress\n"));
1607 Assert(!pEntry->pbDataReplace);
1608
1609 /* Deprecate the current buffer. */
1610 if (!pEntry->pWaitingHead)
1611 pEntry->pbDataReplace = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1612
1613 /* If we are out of memory or have waiting segments
1614 * defer the write. */
1615 if (!pEntry->pbDataReplace || pEntry->pWaitingHead)
1616 {
1617 /* The data isn't written to the file yet */
1618 while (cbToWrite)
1619 {
1620 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1621
1622 pSeg->pTask = pTask;
1623 pSeg->uBufOffset = OffDiff;
1624 pSeg->cbTransfer = RT_MIN(cbToWrite, cbSegLeft);
1625 pSeg->pvBuf = pbSegBuf;
1626 pSeg->fWrite = true;
1627
1628 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1629
1630 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1631
1632 off += pSeg->cbTransfer;
1633 OffDiff += pSeg->cbTransfer;
1634 cbToWrite -= pSeg->cbTransfer;
1635 }
1636 STAM_COUNTER_INC(&pEndpointCache->StatWriteDeferred);
1637 }
1638 else /* Deprecate buffer */
1639 {
1640 LogFlow(("Deprecating buffer for entry %#p\n", pEntry));
1641 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DEPRECATED;
1642
1643#if 1
1644 /* Copy the data before the update. */
1645 if (OffDiff)
1646 memcpy(pEntry->pbDataReplace, pEntry->pbData, OffDiff);
1647
1648 /* Copy data behind the update. */
1649 if ((pEntry->cbData - OffDiff - cbToWrite) > 0)
1650 memcpy(pEntry->pbDataReplace + OffDiff + cbToWrite,
1651 pEntry->pbData + OffDiff + cbToWrite,
1652 (pEntry->cbData - OffDiff - cbToWrite));
1653#else
1654 /* A safer method but probably slower. */
1655 memcpy(pEntry->pbDataReplace, pEntry->pbData, pEntry->cbData);
1656#endif
1657
1658 /* Update the data from the write. */
1659 while (cbToWrite)
1660 {
1661 size_t cbCopy = RT_MIN(cbSegLeft, cbToWrite);
1662
1663 memcpy(pEntry->pbDataReplace + OffDiff, pbSegBuf, cbCopy);
1664
1665 ADVANCE_SEGMENT_BUFFER(cbCopy);
1666
1667 cbToWrite-= cbCopy;
1668 off += cbCopy;
1669 OffDiff += cbCopy;
1670 ASMAtomicSubS32(&pTask->cbTransferLeft, cbCopy);
1671 }
1672
1673 /* We are done here. A new write is initiated if the current request completes. */
1674 }
1675
1676 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1677 }
1678 else /* Dirty bit not set */
1679 {
1680 /*
1681 * Check if a read is in progress for this entry.
1682 * We have to defer processing in that case.
1683 */
1684 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1685 PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1686 0))
1687 {
1688 while (cbToWrite)
1689 {
1690 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1691
1692 pSeg->pTask = pTask;
1693 pSeg->uBufOffset = OffDiff;
1694 pSeg->cbTransfer = RT_MIN(cbToWrite, cbSegLeft);
1695 pSeg->pvBuf = pbSegBuf;
1696 pSeg->fWrite = true;
1697
1698 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1699
1700 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1701
1702 off += pSeg->cbTransfer;
1703 OffDiff += pSeg->cbTransfer;
1704 cbToWrite -= pSeg->cbTransfer;
1705 }
1706 STAM_COUNTER_INC(&pEndpointCache->StatWriteDeferred);
1707 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1708 }
1709 else /* I/O in progres flag not set */
1710 {
1711 /* Write as much as we can into the entry and update the file. */
1712 while (cbToWrite)
1713 {
1714 size_t cbCopy = RT_MIN(cbSegLeft, cbToWrite);
1715
1716 memcpy(pEntry->pbData + OffDiff, pbSegBuf, cbCopy);
1717
1718 ADVANCE_SEGMENT_BUFFER(cbCopy);
1719
1720 cbToWrite-= cbCopy;
1721 off += cbCopy;
1722 OffDiff += cbCopy;
1723 ASMAtomicSubS32(&pTask->cbTransferLeft, cbCopy);
1724 }
1725
1726 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
1727 pdmacFileCacheWriteToEndpoint(pEntry);
1728 }
1729 } /* Dirty bit not set */
1730
1731 /* Move this entry to the top position */
1732#ifdef VBOX_WITH_2Q_CACHE
1733 if (pEntry->pList == &pCache->LruFrequentlyUsed)
1734 {
1735 RTCritSectEnter(&pCache->CritSect);
1736 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1737 RTCritSectLeave(&pCache->CritSect);
1738 } /* Deprecated flag not set. */
1739#else
1740 RTCritSectEnter(&pCache->CritSect);
1741 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1742 RTCritSectLeave(&pCache->CritSect);
1743#endif
1744 }
1745 }
1746 else /* Entry is on the ghost list */
1747 {
1748 uint8_t *pbBuffer = NULL;
1749
1750#ifdef VBOX_WITH_2Q_CACHE
1751 RTCritSectEnter(&pCache->CritSect);
1752 pdmacFileCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
1753 pdmacFileCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
1754
1755 /* Move the entry to Am and fetch it to the cache. */
1756 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1757 RTCritSectLeave(&pCache->CritSect);
1758#else
1759 RTCritSectEnter(&pCache->CritSect);
1760 pdmacFileCacheUpdate(pCache, pEntry);
1761 pdmacFileCacheReplace(pCache, pEntry->cbData, pEntry->pList, true, &pbBuffer);
1762
1763 /* Move the entry to T2 and fetch it to the cache. */
1764 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1765 RTCritSectLeave(&pCache->CritSect);
1766#endif
1767
1768 if (pbBuffer)
1769 pEntry->pbData = pbBuffer;
1770 else
1771 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1772 AssertPtr(pEntry->pbData);
1773
1774 while (cbToWrite)
1775 {
1776 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1777
1778 AssertMsg(off >= pEntry->Core.Key,
1779 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1780 off, pEntry->Core.Key));
1781
1782 pSeg->pTask = pTask;
1783 pSeg->uBufOffset = OffDiff;
1784 pSeg->cbTransfer = RT_MIN(cbToWrite, cbSegLeft);
1785 pSeg->pvBuf = pbSegBuf;
1786 pSeg->fWrite = true;
1787
1788 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1789
1790 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1791
1792 off += pSeg->cbTransfer;
1793 OffDiff += pSeg->cbTransfer;
1794 cbToWrite -= pSeg->cbTransfer;
1795 }
1796
1797 STAM_COUNTER_INC(&pEndpointCache->StatWriteDeferred);
1798 pdmacFileCacheReadFromEndpoint(pEntry);
1799 }
1800
1801 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
1802 pdmacFileEpCacheEntryRelease(pEntry);
1803 }
1804 else /* No entry found */
1805 {
1806 /*
1807 * No entry found. Try to create a new cache entry to store the data in and if that fails
1808 * write directly to the file.
1809 */
1810 PPDMACFILECACHEENTRY pEntryBestFit = pdmacFileEpCacheGetCacheBestFitEntryByOffset(pEndpointCache, off);
1811
1812 LogFlow(("%sest fit entry for off=%RTfoff (BestFit=%RTfoff BestFitEnd=%RTfoff BestFitSize=%u)\n",
1813 pEntryBestFit ? "B" : "No b",
1814 off,
1815 pEntryBestFit ? pEntryBestFit->Core.Key : 0,
1816 pEntryBestFit ? pEntryBestFit->Core.KeyLast : 0,
1817 pEntryBestFit ? pEntryBestFit->cbData : 0));
1818
1819 if (pEntryBestFit && ((off + (RTFOFF)cbWrite) > pEntryBestFit->Core.Key))
1820 {
1821 cbToWrite = pEntryBestFit->Core.Key - off;
1822 pdmacFileEpCacheEntryRelease(pEntryBestFit);
1823 }
1824 else
1825 {
1826 if (pEntryBestFit)
1827 pdmacFileEpCacheEntryRelease(pEntryBestFit);
1828
1829 cbToWrite = cbWrite;
1830 }
1831
1832 cbWrite -= cbToWrite;
1833
1834 STAM_COUNTER_INC(&pCache->cMisses);
1835 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
1836
1837 uint8_t *pbBuffer = NULL;
1838
1839#ifdef VBOX_WITH_2Q_CACHE
1840 RTCritSectEnter(&pCache->CritSect);
1841 bool fEnough = pdmacFileCacheReclaim(pCache, cbToWrite, true, &pbBuffer);
1842 RTCritSectLeave(&pCache->CritSect);
1843
1844 if (fEnough)
1845 {
1846 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbToWrite));
1847#else
1848 RTCritSectEnter(&pCache->CritSect);
1849 size_t cbRemoved = pdmacFileCacheEvict(pCache, cbToWrite, true, &pbBuffer);
1850 RTCritSectLeave(&pCache->CritSect);
1851
1852 if (cbRemoved >= cbToWrite)
1853 {
1854 LogFlow(("Evicted %u bytes (%u requested). Creating new cache entry\n", cbRemoved, cbToWrite));
1855
1856#endif
1857 uint8_t *pbBuf;
1858 PPDMACFILECACHEENTRY pEntryNew;
1859
1860 pEntryNew = pdmacFileCacheEntryAlloc(pCache, pEndpoint, off, cbToWrite, pbBuffer);
1861 AssertPtr(pEntryNew);
1862
1863 RTCritSectEnter(&pCache->CritSect);
1864#ifdef VBOX_WITH_2Q_CACHE
1865 pdmacFileCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1866#else
1867 pdmacFileCacheEntryAddToList(&pCache->LruRecentlyUsed, pEntryNew);
1868#endif
1869 pCache->cbCached += cbToWrite;
1870 RTCritSectLeave(&pCache->CritSect);
1871
1872 pdmacFileEpCacheInsertEntry(pEndpointCache, pEntryNew);
1873
1874 off += cbToWrite;
1875 pbBuf = pEntryNew->pbData;
1876
1877 while (cbToWrite)
1878 {
1879 size_t cbCopy = RT_MIN(cbSegLeft, cbToWrite);
1880
1881 memcpy(pbBuf, pbSegBuf, cbCopy);
1882
1883 ADVANCE_SEGMENT_BUFFER(cbCopy);
1884
1885 cbToWrite -= cbCopy;
1886 pbBuf += cbCopy;
1887 ASMAtomicSubS32(&pTask->cbTransferLeft, cbCopy);
1888 }
1889
1890 pEntryNew->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
1891 pdmacFileCacheWriteToEndpoint(pEntryNew);
1892 pdmacFileEpCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
1893 }
1894 else
1895 {
1896 /*
1897 * There is not enough free space in the cache.
1898 * Pass the request directly to the I/O manager.
1899 */
1900 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite));
1901
1902 while (cbToWrite)
1903 {
1904 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEndpoint);
1905 AssertPtr(pIoTask);
1906
1907 pIoTask->pEndpoint = pEndpoint;
1908 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_WRITE;
1909 pIoTask->Off = off;
1910 pIoTask->DataSeg.cbSeg = RT_MIN(cbToWrite, cbSegLeft);
1911 pIoTask->DataSeg.pvSeg = pbSegBuf;
1912 pIoTask->pvUser = pTask;
1913 pIoTask->pfnCompleted = pdmacFileEpTaskCompleted;
1914
1915 off += pIoTask->DataSeg.cbSeg;
1916 cbToWrite -= pIoTask->DataSeg.cbSeg;
1917
1918 ADVANCE_SEGMENT_BUFFER(pIoTask->DataSeg.cbSeg);
1919
1920 /* Send it off to the I/O manager. */
1921 pdmacFileEpAddTask(pEndpoint, pIoTask);
1922 }
1923 }
1924 }
1925 }
1926
1927 ASMAtomicWriteBool(&pTask->fCompleted, false);
1928
1929 if (ASMAtomicReadS32(&pTask->cbTransferLeft) == 0
1930 && !ASMAtomicXchgBool(&pTask->fCompleted, true))
1931 {
1932 pdmR3AsyncCompletionCompleteTask(&pTask->Core);
1933
1934 /* Complete a pending flush if all writes have completed */
1935 uint32_t cWritesOutstanding = ASMAtomicDecU32(&pEndpointCache->cWritesOutstanding);
1936 PPDMASYNCCOMPLETIONTASKFILE pTaskFlush = (PPDMASYNCCOMPLETIONTASKFILE)ASMAtomicXchgPtr((void * volatile *)&pEndpointCache->pTaskFlush, NULL);
1937
1938 if (!cWritesOutstanding && pTaskFlush)
1939 pdmR3AsyncCompletionCompleteTask(&pTaskFlush->Core);
1940 }
1941
1942 return VINF_SUCCESS;
1943}
1944
1945#undef ADVANCE_SEGMENT_BUFFER
1946
1947int pdmacFileEpCacheFlush(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask)
1948{
1949 int rc = VINF_SUCCESS;
1950
1951 if (ASMAtomicReadPtr((void * volatile *)&pEndpoint->DataCache.pTaskFlush))
1952 rc = VERR_RESOURCE_BUSY;
1953 else
1954 {
1955 if (ASMAtomicReadU32(&pEndpoint->DataCache.cWritesOutstanding) > 0)
1956 ASMAtomicWritePtr((void * volatile *)&pEndpoint->DataCache.pTaskFlush, pTask);
1957 else
1958 pdmR3AsyncCompletionCompleteTask(&pTask->Core);
1959 }
1960
1961 return rc;
1962}
1963
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use