VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/FTM.cpp@ 50653

Last change on this file since 50653 was 47786, checked in by vboxsync, 11 years ago

PGM: Added a new page type for the VT-x APIC access page MMIO alias instead of abusing the MMIO2 aliasing. There are important differences, we can safely access the MMIO2 page when aliased and save time doing so, while the alias created by IOMMMIOMapMMIOHCPage must not be accessed outside the VT-x execution AFAIK.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 42.3 KB
Line 
1/* $Id: FTM.cpp 47786 2013-08-16 08:59:32Z vboxsync $ */
2/** @file
3 * FTM - Fault Tolerance Manager
4 */
5
6/*
7 * Copyright (C) 2010-2013 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_FTM
23#include <VBox/vmm/ftm.h>
24#include <VBox/vmm/em.h>
25#include <VBox/vmm/pdm.h>
26#include <VBox/vmm/pgm.h>
27#include <VBox/vmm/ssm.h>
28#include <VBox/vmm/vmm.h>
29#include "FTMInternal.h"
30#include <VBox/vmm/vm.h>
31#include <VBox/vmm/uvm.h>
32#include <VBox/err.h>
33#include <VBox/param.h>
34#include <VBox/log.h>
35
36#include <iprt/assert.h>
37#include <iprt/thread.h>
38#include <iprt/string.h>
39#include <iprt/mem.h>
40#include <iprt/tcp.h>
41#include <iprt/socket.h>
42#include <iprt/semaphore.h>
43#include <iprt/asm.h>
44
45#include "internal/pgm.h"
46
47
48/*******************************************************************************
49 * Structures and Typedefs *
50 *******************************************************************************/
51
52/**
53 * TCP stream header.
54 *
55 * This is an extra layer for fixing the problem with figuring out when the SSM
56 * stream ends.
57 */
58typedef struct FTMTCPHDR
59{
60 /** Magic value. */
61 uint32_t u32Magic;
62 /** The size of the data block following this header.
63 * 0 indicates the end of the stream, while UINT32_MAX indicates
64 * cancelation. */
65 uint32_t cb;
66} FTMTCPHDR;
67/** Magic value for FTMTCPHDR::u32Magic. (Egberto Gismonti Amin) */
68#define FTMTCPHDR_MAGIC UINT32_C(0x19471205)
69/** The max block size. */
70#define FTMTCPHDR_MAX_SIZE UINT32_C(0x00fffff8)
71
72/**
73 * TCP stream header.
74 *
75 * This is an extra layer for fixing the problem with figuring out when the SSM
76 * stream ends.
77 */
78typedef struct FTMTCPHDRMEM
79{
80 /** Magic value. */
81 uint32_t u32Magic;
82 /** Size (Uncompressed) of the pages following the header. */
83 uint32_t cbPageRange;
84 /** GC Physical address of the page(s) to sync. */
85 RTGCPHYS GCPhys;
86 /** The size of the data block following this header.
87 * 0 indicates the end of the stream, while UINT32_MAX indicates
88 * cancelation. */
89 uint32_t cb;
90} FTMTCPHDRMEM;
91
92/*******************************************************************************
93* Global Variables *
94*******************************************************************************/
95static const char g_szWelcome[] = "VirtualBox-Fault-Tolerance-Sync-1.0\n";
96
97static DECLCALLBACK(int) ftmR3PageTreeDestroyCallback(PAVLGCPHYSNODECORE pBaseNode, void *pvUser);
98
99/**
100 * Initializes the FTM.
101 *
102 * @returns VBox status code.
103 * @param pVM Pointer to the VM.
104 */
105VMMR3_INT_DECL(int) FTMR3Init(PVM pVM)
106{
107 /*
108 * Assert alignment and sizes.
109 */
110 AssertCompile(sizeof(pVM->ftm.s) <= sizeof(pVM->ftm.padding));
111 AssertCompileMemberAlignment(FTM, CritSect, sizeof(uintptr_t));
112
113 /** @todo saved state for master nodes! */
114 pVM->ftm.s.pszAddress = NULL;
115 pVM->ftm.s.pszPassword = NULL;
116 pVM->fFaultTolerantMaster = false;
117 pVM->ftm.s.fIsStandbyNode = false;
118 pVM->ftm.s.standby.hServer = NIL_RTTCPSERVER;
119 pVM->ftm.s.hShutdownEvent = NIL_RTSEMEVENT;
120 pVM->ftm.s.hSocket = NIL_RTSOCKET;
121
122 /*
123 * Initialize the PGM critical section.
124 */
125 int rc = PDMR3CritSectInit(pVM, &pVM->ftm.s.CritSect, RT_SRC_POS, "FTM");
126 AssertRCReturn(rc, rc);
127
128 /*
129 * Register statistics.
130 */
131 STAM_REL_REG(pVM, &pVM->ftm.s.StatReceivedMem, STAMTYPE_COUNTER, "/FT/Received/Mem", STAMUNIT_BYTES, "The amount of memory pages that was received.");
132 STAM_REL_REG(pVM, &pVM->ftm.s.StatReceivedState, STAMTYPE_COUNTER, "/FT/Received/State", STAMUNIT_BYTES, "The amount of state information that was received.");
133 STAM_REL_REG(pVM, &pVM->ftm.s.StatSentMem, STAMTYPE_COUNTER, "/FT/Sent/Mem", STAMUNIT_BYTES, "The amount of memory pages that was sent.");
134 STAM_REL_REG(pVM, &pVM->ftm.s.StatSentState, STAMTYPE_COUNTER, "/FT/Sent/State", STAMUNIT_BYTES, "The amount of state information that was sent.");
135 STAM_REL_REG(pVM, &pVM->ftm.s.StatDeltaVM, STAMTYPE_COUNTER, "/FT/Sync/DeltaVM", STAMUNIT_OCCURENCES, "Number of delta vm syncs.");
136 STAM_REL_REG(pVM, &pVM->ftm.s.StatFullSync, STAMTYPE_COUNTER, "/FT/Sync/Full", STAMUNIT_OCCURENCES, "Number of full vm syncs.");
137 STAM_REL_REG(pVM, &pVM->ftm.s.StatDeltaMem, STAMTYPE_COUNTER, "/FT/Sync/DeltaMem", STAMUNIT_OCCURENCES, "Number of delta mem syncs.");
138 STAM_REL_REG(pVM, &pVM->ftm.s.StatCheckpointStorage, STAMTYPE_COUNTER, "/FT/Checkpoint/Storage", STAMUNIT_OCCURENCES, "Number of storage checkpoints.");
139 STAM_REL_REG(pVM, &pVM->ftm.s.StatCheckpointNetwork, STAMTYPE_COUNTER, "/FT/Checkpoint/Network", STAMUNIT_OCCURENCES, "Number of network checkpoints.");
140#ifdef VBOX_WITH_STATISTICS
141 STAM_REG(pVM, &pVM->ftm.s.StatCheckpoint, STAMTYPE_PROFILE, "/FT/Checkpoint", STAMUNIT_TICKS_PER_CALL, "Profiling of FTMR3SetCheckpoint.");
142 STAM_REG(pVM, &pVM->ftm.s.StatCheckpointPause, STAMTYPE_PROFILE, "/FT/Checkpoint/Pause", STAMUNIT_TICKS_PER_CALL, "Profiling of FTMR3SetCheckpoint.");
143 STAM_REG(pVM, &pVM->ftm.s.StatCheckpointResume, STAMTYPE_PROFILE, "/FT/Checkpoint/Resume", STAMUNIT_TICKS_PER_CALL, "Profiling of FTMR3SetCheckpoint.");
144 STAM_REG(pVM, &pVM->ftm.s.StatSentMemRAM, STAMTYPE_COUNTER, "/FT/Sent/Mem/RAM", STAMUNIT_BYTES, "The amount of memory pages that was sent.");
145 STAM_REG(pVM, &pVM->ftm.s.StatSentMemMMIO2, STAMTYPE_COUNTER, "/FT/Sent/Mem/MMIO2", STAMUNIT_BYTES, "The amount of memory pages that was sent.");
146 STAM_REG(pVM, &pVM->ftm.s.StatSentMemShwROM, STAMTYPE_COUNTER, "/FT/Sent/Mem/ShwROM", STAMUNIT_BYTES, "The amount of memory pages that was sent.");
147 STAM_REG(pVM, &pVM->ftm.s.StatSentStateWrite, STAMTYPE_COUNTER, "/FT/Sent/State/Writes", STAMUNIT_BYTES, "The nr of write calls.");
148#endif
149 return VINF_SUCCESS;
150}
151
152/**
153 * Terminates the FTM.
154 *
155 * Termination means cleaning up and freeing all resources,
156 * the VM itself is at this point powered off or suspended.
157 *
158 * @returns VBox status code.
159 * @param pVM Pointer to the VM.
160 */
161VMMR3_INT_DECL(int) FTMR3Term(PVM pVM)
162{
163 if (pVM->ftm.s.hShutdownEvent != NIL_RTSEMEVENT)
164 {
165 RTSemEventDestroy(pVM->ftm.s.hShutdownEvent);
166 pVM->ftm.s.hShutdownEvent = NIL_RTSEMEVENT;
167 }
168 if (pVM->ftm.s.hSocket != NIL_RTSOCKET)
169 {
170 RTTcpClientClose(pVM->ftm.s.hSocket);
171 pVM->ftm.s.hSocket = NIL_RTSOCKET;
172 }
173 if (pVM->ftm.s.standby.hServer)
174 {
175 RTTcpServerDestroy(pVM->ftm.s.standby.hServer);
176 pVM->ftm.s.standby.hServer = NULL;
177 }
178 if (pVM->ftm.s.pszAddress)
179 RTMemFree(pVM->ftm.s.pszAddress);
180 if (pVM->ftm.s.pszPassword)
181 RTMemFree(pVM->ftm.s.pszPassword);
182
183 /* Remove all pending memory updates. */
184 if (pVM->ftm.s.standby.pPhysPageTree)
185 {
186 RTAvlGCPhysDestroy(&pVM->ftm.s.standby.pPhysPageTree, ftmR3PageTreeDestroyCallback, NULL);
187 pVM->ftm.s.standby.pPhysPageTree = NULL;
188 }
189
190 pVM->ftm.s.pszAddress = NULL;
191 pVM->ftm.s.pszPassword = NULL;
192
193 PDMR3CritSectDelete(&pVM->ftm.s.CritSect);
194 return VINF_SUCCESS;
195}
196
197
198static int ftmR3TcpWriteACK(PVM pVM)
199{
200 int rc = RTTcpWrite(pVM->ftm.s.hSocket, RT_STR_TUPLE("ACK\n"));
201 if (RT_FAILURE(rc))
202 {
203 LogRel(("FTSync: RTTcpWrite(,ACK,) -> %Rrc\n", rc));
204 }
205 return rc;
206}
207
208
209static int ftmR3TcpWriteNACK(PVM pVM, int32_t rc2, const char *pszMsgText = NULL)
210{
211 char szMsg[256];
212 size_t cch;
213 if (pszMsgText && *pszMsgText)
214 {
215 cch = RTStrPrintf(szMsg, sizeof(szMsg), "NACK=%d;%s\n", rc2, pszMsgText);
216 for (size_t off = 6; off + 1 < cch; off++)
217 if (szMsg[off] == '\n')
218 szMsg[off] = '\r';
219 }
220 else
221 cch = RTStrPrintf(szMsg, sizeof(szMsg), "NACK=%d\n", rc2);
222 int rc = RTTcpWrite(pVM->ftm.s.hSocket, szMsg, cch);
223 if (RT_FAILURE(rc))
224 LogRel(("FTSync: RTTcpWrite(,%s,%zu) -> %Rrc\n", szMsg, cch, rc));
225 return rc;
226}
227
228/**
229 * Reads a string from the socket.
230 *
231 * @returns VBox status code.
232 *
233 * @param pState The teleporter state structure.
234 * @param pszBuf The output buffer.
235 * @param cchBuf The size of the output buffer.
236 *
237 */
238static int ftmR3TcpReadLine(PVM pVM, char *pszBuf, size_t cchBuf)
239{
240 char *pszStart = pszBuf;
241 RTSOCKET Sock = pVM->ftm.s.hSocket;
242
243 AssertReturn(cchBuf > 1, VERR_INTERNAL_ERROR);
244 *pszBuf = '\0';
245
246 /* dead simple approach. */
247 for (;;)
248 {
249 char ch;
250 int rc = RTTcpRead(Sock, &ch, sizeof(ch), NULL);
251 if (RT_FAILURE(rc))
252 {
253 LogRel(("FTSync: RTTcpRead -> %Rrc while reading string ('%s')\n", rc, pszStart));
254 return rc;
255 }
256 if ( ch == '\n'
257 || ch == '\0')
258 return VINF_SUCCESS;
259 if (cchBuf <= 1)
260 {
261 LogRel(("FTSync: String buffer overflow: '%s'\n", pszStart));
262 return VERR_BUFFER_OVERFLOW;
263 }
264 *pszBuf++ = ch;
265 *pszBuf = '\0';
266 cchBuf--;
267 }
268}
269
270/**
271 * Reads an ACK or NACK.
272 *
273 * @returns VBox status code.
274 * @param pVM Pointer to the VM.
275 * @param pszWhich Which ACK is this this?
276 * @param pszNAckMsg Optional NACK message.
277 */
278static int ftmR3TcpReadACK(PVM pVM, const char *pszWhich, const char *pszNAckMsg = NULL)
279{
280 char szMsg[256];
281 int rc = ftmR3TcpReadLine(pVM, szMsg, sizeof(szMsg));
282 if (RT_FAILURE(rc))
283 return rc;
284
285 if (!strcmp(szMsg, "ACK"))
286 return VINF_SUCCESS;
287
288 if (!strncmp(szMsg, RT_STR_TUPLE("NACK=")))
289 {
290 char *pszMsgText = strchr(szMsg, ';');
291 if (pszMsgText)
292 *pszMsgText++ = '\0';
293
294 int32_t vrc2;
295 rc = RTStrToInt32Full(&szMsg[sizeof("NACK=") - 1], 10, &vrc2);
296 if (rc == VINF_SUCCESS)
297 {
298 /*
299 * Well formed NACK, transform it into an error.
300 */
301 if (pszNAckMsg)
302 {
303 LogRel(("FTSync: %s: NACK=%Rrc (%d)\n", pszWhich, vrc2, vrc2));
304 return VERR_INTERNAL_ERROR;
305 }
306
307 if (pszMsgText)
308 {
309 pszMsgText = RTStrStrip(pszMsgText);
310 for (size_t off = 0; pszMsgText[off]; off++)
311 if (pszMsgText[off] == '\r')
312 pszMsgText[off] = '\n';
313
314 LogRel(("FTSync: %s: NACK=%Rrc (%d) - '%s'\n", pszWhich, vrc2, vrc2, pszMsgText));
315 }
316 return VERR_INTERNAL_ERROR_2;
317 }
318
319 if (pszMsgText)
320 pszMsgText[-1] = ';';
321 }
322 return VERR_INTERNAL_ERROR_3;
323}
324
325/**
326 * Submitts a command to the destination and waits for the ACK.
327 *
328 * @returns VBox status code.
329 *
330 * @param pVM Pointer to the VM.
331 * @param pszCommand The command.
332 * @param fWaitForAck Whether to wait for the ACK.
333 */
334static int ftmR3TcpSubmitCommand(PVM pVM, const char *pszCommand, bool fWaitForAck = true)
335{
336 int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, pszCommand, strlen(pszCommand), RT_STR_TUPLE("\n"));
337 if (RT_FAILURE(rc))
338 return rc;
339 if (!fWaitForAck)
340 return VINF_SUCCESS;
341 return ftmR3TcpReadACK(pVM, pszCommand);
342}
343
344/**
345 * @copydoc SSMSTRMOPS::pfnWrite
346 */
347static DECLCALLBACK(int) ftmR3TcpOpWrite(void *pvUser, uint64_t offStream, const void *pvBuf, size_t cbToWrite)
348{
349 PVM pVM = (PVM)pvUser;
350 NOREF(offStream);
351
352 AssertReturn(cbToWrite > 0, VINF_SUCCESS);
353 AssertReturn(cbToWrite < UINT32_MAX, VERR_OUT_OF_RANGE);
354 AssertReturn(pVM->fFaultTolerantMaster, VERR_INVALID_HANDLE);
355
356 STAM_COUNTER_INC(&pVM->ftm.s.StatSentStateWrite);
357 for (;;)
358 {
359 FTMTCPHDR Hdr;
360 Hdr.u32Magic = FTMTCPHDR_MAGIC;
361 Hdr.cb = RT_MIN((uint32_t)cbToWrite, FTMTCPHDR_MAX_SIZE);
362 int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, &Hdr, sizeof(Hdr), pvBuf, (size_t)Hdr.cb);
363 if (RT_FAILURE(rc))
364 {
365 LogRel(("FTSync/TCP: Write error: %Rrc (cb=%#x)\n", rc, Hdr.cb));
366 return rc;
367 }
368 pVM->ftm.s.StatSentState.c += Hdr.cb + sizeof(Hdr);
369 pVM->ftm.s.syncstate.uOffStream += Hdr.cb;
370 if (Hdr.cb == cbToWrite)
371 return VINF_SUCCESS;
372
373 /* advance */
374 cbToWrite -= Hdr.cb;
375 pvBuf = (uint8_t const *)pvBuf + Hdr.cb;
376 }
377}
378
379
380/**
381 * Selects and poll for close condition.
382 *
383 * We can use a relatively high poll timeout here since it's only used to get
384 * us out of error paths. In the normal cause of events, we'll get a
385 * end-of-stream header.
386 *
387 * @returns VBox status code.
388 *
389 * @param pState The teleporter state data.
390 */
391static int ftmR3TcpReadSelect(PVM pVM)
392{
393 int rc;
394 do
395 {
396 rc = RTTcpSelectOne(pVM->ftm.s.hSocket, 1000);
397 if (RT_FAILURE(rc) && rc != VERR_TIMEOUT)
398 {
399 pVM->ftm.s.syncstate.fIOError = true;
400 LogRel(("FTSync/TCP: Header select error: %Rrc\n", rc));
401 break;
402 }
403 if (pVM->ftm.s.syncstate.fStopReading)
404 {
405 rc = VERR_EOF;
406 break;
407 }
408 } while (rc == VERR_TIMEOUT);
409 return rc;
410}
411
412
413/**
414 * @copydoc SSMSTRMOPS::pfnRead
415 */
416static DECLCALLBACK(int) ftmR3TcpOpRead(void *pvUser, uint64_t offStream, void *pvBuf, size_t cbToRead, size_t *pcbRead)
417{
418 PVM pVM = (PVM)pvUser;
419 AssertReturn(!pVM->fFaultTolerantMaster, VERR_INVALID_HANDLE);
420 NOREF(offStream);
421
422 for (;;)
423 {
424 int rc;
425
426 /*
427 * Check for various conditions and may have been signalled.
428 */
429 if (pVM->ftm.s.syncstate.fEndOfStream)
430 return VERR_EOF;
431 if (pVM->ftm.s.syncstate.fStopReading)
432 return VERR_EOF;
433 if (pVM->ftm.s.syncstate.fIOError)
434 return VERR_IO_GEN_FAILURE;
435
436 /*
437 * If there is no more data in the current block, read the next
438 * block header.
439 */
440 if (!pVM->ftm.s.syncstate.cbReadBlock)
441 {
442 rc = ftmR3TcpReadSelect(pVM);
443 if (RT_FAILURE(rc))
444 return rc;
445 FTMTCPHDR Hdr;
446 rc = RTTcpRead(pVM->ftm.s.hSocket, &Hdr, sizeof(Hdr), NULL);
447 if (RT_FAILURE(rc))
448 {
449 pVM->ftm.s.syncstate.fIOError = true;
450 LogRel(("FTSync/TCP: Header read error: %Rrc\n", rc));
451 return rc;
452 }
453 pVM->ftm.s.StatReceivedState.c += sizeof(Hdr);
454
455 if (RT_UNLIKELY( Hdr.u32Magic != FTMTCPHDR_MAGIC
456 || Hdr.cb > FTMTCPHDR_MAX_SIZE
457 || Hdr.cb == 0))
458 {
459 if ( Hdr.u32Magic == FTMTCPHDR_MAGIC
460 && ( Hdr.cb == 0
461 || Hdr.cb == UINT32_MAX)
462 )
463 {
464 pVM->ftm.s.syncstate.fEndOfStream = true;
465 pVM->ftm.s.syncstate.cbReadBlock = 0;
466 return Hdr.cb ? VERR_SSM_CANCELLED : VERR_EOF;
467 }
468 pVM->ftm.s.syncstate.fIOError = true;
469 LogRel(("FTSync/TCP: Invalid block: u32Magic=%#x cb=%#x\n", Hdr.u32Magic, Hdr.cb));
470 return VERR_IO_GEN_FAILURE;
471 }
472
473 pVM->ftm.s.syncstate.cbReadBlock = Hdr.cb;
474 if (pVM->ftm.s.syncstate.fStopReading)
475 return VERR_EOF;
476 }
477
478 /*
479 * Read more data.
480 */
481 rc = ftmR3TcpReadSelect(pVM);
482 if (RT_FAILURE(rc))
483 return rc;
484
485 uint32_t cb = (uint32_t)RT_MIN(pVM->ftm.s.syncstate.cbReadBlock, cbToRead);
486 rc = RTTcpRead(pVM->ftm.s.hSocket, pvBuf, cb, pcbRead);
487 if (RT_FAILURE(rc))
488 {
489 pVM->ftm.s.syncstate.fIOError = true;
490 LogRel(("FTSync/TCP: Data read error: %Rrc (cb=%#x)\n", rc, cb));
491 return rc;
492 }
493 if (pcbRead)
494 {
495 cb = (uint32_t)*pcbRead;
496 pVM->ftm.s.StatReceivedState.c += cb;
497 pVM->ftm.s.syncstate.uOffStream += cb;
498 pVM->ftm.s.syncstate.cbReadBlock -= cb;
499 return VINF_SUCCESS;
500 }
501 pVM->ftm.s.StatReceivedState.c += cb;
502 pVM->ftm.s.syncstate.uOffStream += cb;
503 pVM->ftm.s.syncstate.cbReadBlock -= cb;
504 if (cbToRead == cb)
505 return VINF_SUCCESS;
506
507 /* Advance to the next block. */
508 cbToRead -= cb;
509 pvBuf = (uint8_t *)pvBuf + cb;
510 }
511}
512
513
514/**
515 * @copydoc SSMSTRMOPS::pfnSeek
516 */
517static DECLCALLBACK(int) ftmR3TcpOpSeek(void *pvUser, int64_t offSeek, unsigned uMethod, uint64_t *poffActual)
518{
519 NOREF(pvUser); NOREF(offSeek); NOREF(uMethod); NOREF(poffActual);
520 return VERR_NOT_SUPPORTED;
521}
522
523
524/**
525 * @copydoc SSMSTRMOPS::pfnTell
526 */
527static DECLCALLBACK(uint64_t) ftmR3TcpOpTell(void *pvUser)
528{
529 PVM pVM = (PVM)pvUser;
530 return pVM->ftm.s.syncstate.uOffStream;
531}
532
533
534/**
535 * @copydoc SSMSTRMOPS::pfnSize
536 */
537static DECLCALLBACK(int) ftmR3TcpOpSize(void *pvUser, uint64_t *pcb)
538{
539 NOREF(pvUser); NOREF(pcb);
540 return VERR_NOT_SUPPORTED;
541}
542
543
544/**
545 * @copydoc SSMSTRMOPS::pfnIsOk
546 */
547static DECLCALLBACK(int) ftmR3TcpOpIsOk(void *pvUser)
548{
549 PVM pVM = (PVM)pvUser;
550
551 if (pVM->fFaultTolerantMaster)
552 {
553 /* Poll for incoming NACKs and errors from the other side */
554 int rc = RTTcpSelectOne(pVM->ftm.s.hSocket, 0);
555 if (rc != VERR_TIMEOUT)
556 {
557 if (RT_SUCCESS(rc))
558 {
559 LogRel(("FTSync/TCP: Incoming data detect by IsOk, assuming it is a cancellation NACK.\n"));
560 rc = VERR_SSM_CANCELLED;
561 }
562 else
563 LogRel(("FTSync/TCP: RTTcpSelectOne -> %Rrc (IsOk).\n", rc));
564 return rc;
565 }
566 }
567
568 return VINF_SUCCESS;
569}
570
571
572/**
573 * @copydoc SSMSTRMOPS::pfnClose
574 */
575static DECLCALLBACK(int) ftmR3TcpOpClose(void *pvUser, bool fCanceled)
576{
577 PVM pVM = (PVM)pvUser;
578
579 if (pVM->fFaultTolerantMaster)
580 {
581 FTMTCPHDR EofHdr;
582 EofHdr.u32Magic = FTMTCPHDR_MAGIC;
583 EofHdr.cb = fCanceled ? UINT32_MAX : 0;
584 int rc = RTTcpWrite(pVM->ftm.s.hSocket, &EofHdr, sizeof(EofHdr));
585 if (RT_FAILURE(rc))
586 {
587 LogRel(("FTSync/TCP: EOF Header write error: %Rrc\n", rc));
588 return rc;
589 }
590 }
591 else
592 {
593 ASMAtomicWriteBool(&pVM->ftm.s.syncstate.fStopReading, true);
594 }
595
596 return VINF_SUCCESS;
597}
598
599
600/**
601 * Method table for a TCP based stream.
602 */
603static SSMSTRMOPS const g_ftmR3TcpOps =
604{
605 SSMSTRMOPS_VERSION,
606 ftmR3TcpOpWrite,
607 ftmR3TcpOpRead,
608 ftmR3TcpOpSeek,
609 ftmR3TcpOpTell,
610 ftmR3TcpOpSize,
611 ftmR3TcpOpIsOk,
612 ftmR3TcpOpClose,
613 SSMSTRMOPS_VERSION
614};
615
616
617/**
618 * VMR3ReqCallWait callback
619 *
620 * @param pVM Pointer to the VM.
621 *
622 */
623static DECLCALLBACK(void) ftmR3WriteProtectMemory(PVM pVM)
624{
625 int rc = PGMR3PhysWriteProtectRAM(pVM);
626 AssertRC(rc);
627}
628
629
630/**
631 * Sync the VM state
632 *
633 * @returns VBox status code.
634 * @param pVM Pointer to the VM.
635 */
636static int ftmR3PerformFullSync(PVM pVM)
637{
638 bool fSuspended = false;
639
640 int rc = VMR3Suspend(pVM->pUVM, VMSUSPENDREASON_FTM_SYNC);
641 AssertRCReturn(rc, rc);
642
643 STAM_REL_COUNTER_INC(&pVM->ftm.s.StatFullSync);
644
645 RTSocketRetain(pVM->ftm.s.hSocket); /* For concurrent access by I/O thread and EMT. */
646
647 /* Reset the sync state. */
648 pVM->ftm.s.syncstate.uOffStream = 0;
649 pVM->ftm.s.syncstate.cbReadBlock = 0;
650 pVM->ftm.s.syncstate.fStopReading = false;
651 pVM->ftm.s.syncstate.fIOError = false;
652 pVM->ftm.s.syncstate.fEndOfStream = false;
653
654 rc = ftmR3TcpSubmitCommand(pVM, "full-sync");
655 AssertRC(rc);
656
657 pVM->ftm.s.fDeltaLoadSaveActive = false;
658 rc = VMR3SaveFT(pVM->pUVM, &g_ftmR3TcpOps, pVM, &fSuspended, false /* fSkipStateChanges */);
659 AssertRC(rc);
660
661 rc = ftmR3TcpReadACK(pVM, "full-sync-complete");
662 AssertRC(rc);
663
664 RTSocketRelease(pVM->ftm.s.hSocket);
665
666 /* Write protect all memory. */
667 rc = VMR3ReqCallWait(pVM, VMCPUID_ANY, (PFNRT)ftmR3WriteProtectMemory, 1, pVM);
668 AssertRCReturn(rc, rc);
669
670 rc = VMR3Resume(pVM->pUVM, VMRESUMEREASON_FTM_SYNC);
671 AssertRC(rc);
672
673 return rc;
674}
675
676
677/**
678 * PGMR3PhysEnumDirtyFTPages callback for syncing dirty physical pages
679 *
680 * @param pVM Pointer to the VM.
681 * @param GCPhys GC physical address
682 * @param pRange HC virtual address of the page(s)
683 * @param cbRange Size of the dirty range in bytes.
684 * @param pvUser User argument
685 */
686static DECLCALLBACK(int) ftmR3SyncDirtyPage(PVM pVM, RTGCPHYS GCPhys, uint8_t *pRange, unsigned cbRange, void *pvUser)
687{
688 NOREF(pvUser);
689 FTMTCPHDRMEM Hdr;
690 Hdr.u32Magic = FTMTCPHDR_MAGIC;
691 Hdr.GCPhys = GCPhys;
692 Hdr.cbPageRange = cbRange;
693 Hdr.cb = cbRange;
694 /** @todo compress page(s). */
695 int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, &Hdr, sizeof(Hdr), pRange, (size_t)Hdr.cb);
696 if (RT_FAILURE(rc))
697 {
698 LogRel(("FTSync/TCP: Write error (ftmR3SyncDirtyPage): %Rrc (cb=%#x)\n", rc, Hdr.cb));
699 return rc;
700 }
701 pVM->ftm.s.StatSentMem.c += Hdr.cb + sizeof(Hdr);
702
703#ifdef VBOX_WITH_STATISTICS
704 switch (PGMPhysGetPageType(pVM, GCPhys))
705 {
706 case PGMPAGETYPE_RAM:
707 pVM->ftm.s.StatSentMemRAM.c += Hdr.cb + sizeof(Hdr);
708 break;
709
710 case PGMPAGETYPE_MMIO2:
711 pVM->ftm.s.StatSentMemMMIO2.c += Hdr.cb + sizeof(Hdr);
712 break;
713
714 case PGMPAGETYPE_ROM_SHADOW:
715 pVM->ftm.s.StatSentMemShwROM.c += Hdr.cb + sizeof(Hdr);
716 break;
717
718 case PGMPAGETYPE_MMIO2_ALIAS_MMIO:
719 case PGMPAGETYPE_SPECIAL_ALIAS_MMIO:
720 AssertFailed();
721 break;
722
723 default:
724 AssertFailed();
725 break;
726 }
727#endif
728
729 return (pVM->ftm.s.fCheckpointingActive) ? VERR_INTERRUPTED : VINF_SUCCESS;
730}
731
732/**
733 * Thread function which starts syncing process for this master VM
734 *
735 * @param hThread The thread handle.
736 * @param pvUser Pointer to the VM.
737 * @return VINF_SUCCESS (ignored).
738 *
739 */
740static DECLCALLBACK(int) ftmR3MasterThread(RTTHREAD hThread, void *pvUser)
741{
742 int rc = VINF_SUCCESS;
743 PVM pVM = (PVM)pvUser;
744 NOREF(hThread);
745
746 for (;;)
747 {
748 /*
749 * Try connect to the standby machine.
750 */
751 Log(("ftmR3MasterThread: client connect to %s %d\n", pVM->ftm.s.pszAddress, pVM->ftm.s.uPort));
752 rc = RTTcpClientConnect(pVM->ftm.s.pszAddress, pVM->ftm.s.uPort, &pVM->ftm.s.hSocket);
753 if (RT_SUCCESS(rc))
754 {
755 Log(("ftmR3MasterThread: CONNECTED\n"));
756
757 /* Disable Nagle. */
758 rc = RTTcpSetSendCoalescing(pVM->ftm.s.hSocket, false /*fEnable*/);
759 AssertRC(rc);
760
761 /* Read and check the welcome message. */
762 char szLine[RT_MAX(128, sizeof(g_szWelcome))];
763 RT_ZERO(szLine);
764 rc = RTTcpRead(pVM->ftm.s.hSocket, szLine, sizeof(g_szWelcome) - 1, NULL);
765 if ( RT_SUCCESS(rc)
766 && !strcmp(szLine, g_szWelcome))
767 {
768 /* password */
769 if (pVM->ftm.s.pszPassword)
770 rc = RTTcpWrite(pVM->ftm.s.hSocket, pVM->ftm.s.pszPassword, strlen(pVM->ftm.s.pszPassword));
771
772 if (RT_SUCCESS(rc))
773 {
774 /* ACK */
775 rc = ftmR3TcpReadACK(pVM, "password", "Invalid password");
776 if (RT_SUCCESS(rc))
777 {
778 /** todo: verify VM config. */
779 break;
780 }
781 }
782 }
783 /* Failed, so don't bother anymore. */
784 return VINF_SUCCESS;
785 }
786 rc = RTSemEventWait(pVM->ftm.s.hShutdownEvent, 1000 /* 1 second */);
787 if (rc != VERR_TIMEOUT)
788 return VINF_SUCCESS; /* told to quit */
789 }
790
791 /* Successfully initialized the connection to the standby node.
792 * Start the sync process.
793 */
794
795 /* First sync all memory and write protect everything so
796 * we can send changed pages later on.
797 */
798
799 rc = ftmR3PerformFullSync(pVM);
800
801 for (;;)
802 {
803 rc = RTSemEventWait(pVM->ftm.s.hShutdownEvent, pVM->ftm.s.uInterval);
804 if (rc != VERR_TIMEOUT)
805 break; /* told to quit */
806
807 if (!pVM->ftm.s.fCheckpointingActive)
808 {
809 rc = PDMCritSectEnter(&pVM->ftm.s.CritSect, VERR_SEM_BUSY);
810 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc));
811
812 rc = ftmR3TcpSubmitCommand(pVM, "mem-sync");
813 AssertRC(rc);
814
815 /* sync the changed memory with the standby node. */
816 /* Write protect all memory. */
817 if (!pVM->ftm.s.fCheckpointingActive)
818 {
819 rc = VMR3ReqCallWait(pVM, VMCPUID_ANY, (PFNRT)ftmR3WriteProtectMemory, 1, pVM);
820 AssertRC(rc);
821 }
822
823 /* Enumerate all dirty pages and send them to the standby VM. */
824 if (!pVM->ftm.s.fCheckpointingActive)
825 {
826 rc = PGMR3PhysEnumDirtyFTPages(pVM, ftmR3SyncDirtyPage, NULL /* pvUser */);
827 Assert(rc == VINF_SUCCESS || rc == VERR_INTERRUPTED);
828 }
829
830 /* Send last memory header to signal the end. */
831 FTMTCPHDRMEM Hdr;
832 Hdr.u32Magic = FTMTCPHDR_MAGIC;
833 Hdr.GCPhys = 0;
834 Hdr.cbPageRange = 0;
835 Hdr.cb = 0;
836 rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 1, &Hdr, sizeof(Hdr));
837 if (RT_FAILURE(rc))
838 LogRel(("FTSync/TCP: Write error (ftmR3MasterThread): %Rrc (cb=%#x)\n", rc, Hdr.cb));
839
840 rc = ftmR3TcpReadACK(pVM, "mem-sync-complete");
841 AssertRC(rc);
842
843 PDMCritSectLeave(&pVM->ftm.s.CritSect);
844 }
845 }
846 return rc;
847}
848
849/**
850 * Syncs memory from the master VM
851 *
852 * @returns VBox status code.
853 * @param pVM Pointer to the VM.
854 */
855static int ftmR3SyncMem(PVM pVM)
856{
857 while (true)
858 {
859 FTMTCPHDRMEM Hdr;
860 RTGCPHYS GCPhys;
861
862 /* Read memory header. */
863 int rc = RTTcpRead(pVM->ftm.s.hSocket, &Hdr, sizeof(Hdr), NULL);
864 if (RT_FAILURE(rc))
865 {
866 Log(("RTTcpRead failed with %Rrc\n", rc));
867 break;
868 }
869 pVM->ftm.s.StatReceivedMem.c += sizeof(Hdr);
870
871 if (Hdr.cb == 0)
872 break; /* end of sync. */
873
874 Assert(Hdr.cb == Hdr.cbPageRange); /** @todo uncompress */
875 GCPhys = Hdr.GCPhys;
876
877 /* Must be a multiple of PAGE_SIZE. */
878 Assert((Hdr.cbPageRange & 0xfff) == 0);
879
880 while (Hdr.cbPageRange)
881 {
882 PFTMPHYSPAGETREENODE pNode = (PFTMPHYSPAGETREENODE)RTAvlGCPhysGet(&pVM->ftm.s.standby.pPhysPageTree, GCPhys);
883 if (!pNode)
884 {
885 /* Allocate memory for the node and page. */
886 pNode = (PFTMPHYSPAGETREENODE)RTMemAllocZ(sizeof(*pNode) + PAGE_SIZE);
887 AssertBreak(pNode);
888
889 /* Insert the node into the tree. */
890 pNode->Core.Key = GCPhys;
891 pNode->pPage = (void *)(pNode + 1);
892 bool fRet = RTAvlGCPhysInsert(&pVM->ftm.s.standby.pPhysPageTree, &pNode->Core);
893 Assert(fRet); NOREF(fRet);
894 }
895
896 /* Fetch the page. */
897 rc = RTTcpRead(pVM->ftm.s.hSocket, pNode->pPage, PAGE_SIZE, NULL);
898 if (RT_FAILURE(rc))
899 {
900 Log(("RTTcpRead page data (%d bytes) failed with %Rrc\n", Hdr.cb, rc));
901 break;
902 }
903 pVM->ftm.s.StatReceivedMem.c += PAGE_SIZE;
904 Hdr.cbPageRange -= PAGE_SIZE;
905 GCPhys += PAGE_SIZE;
906 }
907 }
908 return VINF_SUCCESS;
909}
910
911
912/**
913 * Callback handler for RTAvlGCPhysDestroy
914 *
915 * @returns 0 to continue, otherwise stop
916 * @param pBaseNode Node to destroy
917 * @param pvUser Pointer to the VM.
918 */
919static DECLCALLBACK(int) ftmR3PageTreeDestroyCallback(PAVLGCPHYSNODECORE pBaseNode, void *pvUser)
920{
921 PVM pVM = (PVM)pvUser;
922 PFTMPHYSPAGETREENODE pNode = (PFTMPHYSPAGETREENODE)pBaseNode;
923
924 if (pVM) /* NULL when the VM is destroyed. */
925 {
926 /* Update the guest memory of the standby VM. */
927 int rc = PGMR3PhysWriteExternal(pVM, pNode->Core.Key, pNode->pPage, PAGE_SIZE, "FTMemSync");
928 AssertRC(rc);
929 }
930 RTMemFree(pNode);
931 return 0;
932}
933
934/**
935 * Thread function which monitors the health of the master VM
936 *
937 * @param hThread The thread handle.
938 * @param pvUser Pointer to the VM.
939 * @return VINF_SUCCESS (ignored).
940 *
941 */
942static DECLCALLBACK(int) ftmR3StandbyThread(RTTHREAD hThread, void *pvUser)
943{
944 PVM pVM = (PVM)pvUser;
945 NOREF(hThread);
946
947 for (;;)
948 {
949 uint64_t u64TimeNow;
950
951 int rc = RTSemEventWait(pVM->ftm.s.hShutdownEvent, pVM->ftm.s.uInterval);
952 if (rc != VERR_TIMEOUT)
953 break; /* told to quit */
954
955 if (pVM->ftm.s.standby.u64LastHeartbeat)
956 {
957 u64TimeNow = RTTimeMilliTS();
958
959 if (u64TimeNow > pVM->ftm.s.standby.u64LastHeartbeat + pVM->ftm.s.uInterval * 4)
960 {
961 /* Timeout; prepare to fallover. */
962 LogRel(("FTSync: TIMEOUT (%RX64 vs %RX64 ms): activate standby VM!\n", u64TimeNow, pVM->ftm.s.standby.u64LastHeartbeat + pVM->ftm.s.uInterval * 2));
963
964 pVM->ftm.s.fActivateStandby = true;
965 /** todo: prevent split-brain. */
966 break;
967 }
968 }
969 }
970
971 return VINF_SUCCESS;
972}
973
974
975/**
976 * Listen for incoming traffic destined for the standby VM.
977 *
978 * @copydoc FNRTTCPSERVE
979 *
980 * @returns VINF_SUCCESS or VERR_TCP_SERVER_STOP.
981 */
982static DECLCALLBACK(int) ftmR3StandbyServeConnection(RTSOCKET Sock, void *pvUser)
983{
984 PVM pVM = (PVM)pvUser;
985
986 pVM->ftm.s.hSocket = Sock;
987
988 /*
989 * Disable Nagle.
990 */
991 int rc = RTTcpSetSendCoalescing(Sock, false /*fEnable*/);
992 AssertRC(rc);
993
994 /* Send the welcome message to the master node. */
995 rc = RTTcpWrite(Sock, g_szWelcome, sizeof(g_szWelcome) - 1);
996 if (RT_FAILURE(rc))
997 {
998 LogRel(("Teleporter: Failed to write welcome message: %Rrc\n", rc));
999 return VINF_SUCCESS;
1000 }
1001
1002 /*
1003 * Password.
1004 */
1005 const char *pszPassword = pVM->ftm.s.pszPassword;
1006 if (pszPassword)
1007 {
1008 unsigned off = 0;
1009 while (pszPassword[off])
1010 {
1011 char ch;
1012 rc = RTTcpRead(Sock, &ch, sizeof(ch), NULL);
1013 if ( RT_FAILURE(rc)
1014 || pszPassword[off] != ch)
1015 {
1016 if (RT_FAILURE(rc))
1017 LogRel(("FTSync: Password read failure (off=%u): %Rrc\n", off, rc));
1018 else
1019 LogRel(("FTSync: Invalid password (off=%u)\n", off));
1020 ftmR3TcpWriteNACK(pVM, VERR_AUTHENTICATION_FAILURE);
1021 return VINF_SUCCESS;
1022 }
1023 off++;
1024 }
1025 }
1026 rc = ftmR3TcpWriteACK(pVM);
1027 if (RT_FAILURE(rc))
1028 return VINF_SUCCESS;
1029
1030 /** @todo verify VM config. */
1031
1032 /*
1033 * Stop the server.
1034 *
1035 * Note! After this point we must return VERR_TCP_SERVER_STOP, while prior
1036 * to it we must not return that value!
1037 */
1038 RTTcpServerShutdown(pVM->ftm.s.standby.hServer);
1039
1040 /*
1041 * Command processing loop.
1042 */
1043 //bool fDone = false;
1044 for (;;)
1045 {
1046 bool fFullSync = false;
1047 char szCmd[128];
1048
1049 rc = ftmR3TcpReadLine(pVM, szCmd, sizeof(szCmd));
1050 if (RT_FAILURE(rc))
1051 break;
1052
1053 pVM->ftm.s.standby.u64LastHeartbeat = RTTimeMilliTS();
1054 if (!strcmp(szCmd, "mem-sync"))
1055 {
1056 rc = ftmR3TcpWriteACK(pVM);
1057 AssertRC(rc);
1058 if (RT_FAILURE(rc))
1059 continue;
1060
1061 rc = ftmR3SyncMem(pVM);
1062 AssertRC(rc);
1063
1064 rc = ftmR3TcpWriteACK(pVM);
1065 AssertRC(rc);
1066 }
1067 else
1068 if ( !strcmp(szCmd, "checkpoint")
1069 || !strcmp(szCmd, "full-sync")
1070 || (fFullSync = true)) /* intended assignment */
1071 {
1072 rc = ftmR3TcpWriteACK(pVM);
1073 AssertRC(rc);
1074 if (RT_FAILURE(rc))
1075 continue;
1076
1077 /* Flush all pending memory updates. */
1078 if (pVM->ftm.s.standby.pPhysPageTree)
1079 {
1080 RTAvlGCPhysDestroy(&pVM->ftm.s.standby.pPhysPageTree, ftmR3PageTreeDestroyCallback, pVM);
1081 pVM->ftm.s.standby.pPhysPageTree = NULL;
1082 }
1083
1084 RTSocketRetain(pVM->ftm.s.hSocket); /* For concurrent access by I/O thread and EMT. */
1085
1086 /* Reset the sync state. */
1087 pVM->ftm.s.syncstate.uOffStream = 0;
1088 pVM->ftm.s.syncstate.cbReadBlock = 0;
1089 pVM->ftm.s.syncstate.fStopReading = false;
1090 pVM->ftm.s.syncstate.fIOError = false;
1091 pVM->ftm.s.syncstate.fEndOfStream = false;
1092
1093 pVM->ftm.s.fDeltaLoadSaveActive = (fFullSync == false);
1094 rc = VMR3LoadFromStreamFT(pVM->pUVM, &g_ftmR3TcpOps, pVM);
1095 pVM->ftm.s.fDeltaLoadSaveActive = false;
1096 RTSocketRelease(pVM->ftm.s.hSocket);
1097 AssertRC(rc);
1098 if (RT_FAILURE(rc))
1099 {
1100 LogRel(("FTSync: VMR3LoadFromStream -> %Rrc\n", rc));
1101 ftmR3TcpWriteNACK(pVM, rc);
1102 continue;
1103 }
1104
1105 /* The EOS might not have been read, make sure it is. */
1106 pVM->ftm.s.syncstate.fStopReading = false;
1107 size_t cbRead;
1108 rc = ftmR3TcpOpRead(pVM, pVM->ftm.s.syncstate.uOffStream, szCmd, 1, &cbRead);
1109 if (rc != VERR_EOF)
1110 {
1111 LogRel(("FTSync: Draining teleporterTcpOpRead -> %Rrc\n", rc));
1112 ftmR3TcpWriteNACK(pVM, rc);
1113 continue;
1114 }
1115
1116 rc = ftmR3TcpWriteACK(pVM);
1117 AssertRC(rc);
1118 }
1119 }
1120 LogFlowFunc(("returns mRc=%Rrc\n", rc));
1121 return VERR_TCP_SERVER_STOP;
1122}
1123
1124/**
1125 * Powers on the fault tolerant virtual machine.
1126 *
1127 * @returns VBox status code.
1128 *
1129 * @param pUVM The user mode VM handle.
1130 * @param fMaster FT master or standby
1131 * @param uInterval FT sync interval
1132 * @param pszAddress Standby VM address
1133 * @param uPort Standby VM port
1134 * @param pszPassword FT password (NULL for none)
1135 *
1136 * @thread Any thread.
1137 * @vmstate Created
1138 * @vmstateto PoweringOn+Running (master), PoweringOn+Running_FT (standby)
1139 */
1140VMMR3DECL(int) FTMR3PowerOn(PUVM pUVM, bool fMaster, unsigned uInterval,
1141 const char *pszAddress, unsigned uPort, const char *pszPassword)
1142{
1143 UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE);
1144 PVM pVM = pUVM->pVM;
1145 VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE);
1146
1147 VMSTATE enmVMState = VMR3GetState(pVM);
1148 AssertMsgReturn(enmVMState == VMSTATE_CREATED,
1149 ("%s\n", VMR3GetStateName(enmVMState)),
1150 VERR_INTERNAL_ERROR_4);
1151 AssertReturn(pszAddress, VERR_INVALID_PARAMETER);
1152
1153 if (pVM->ftm.s.uInterval)
1154 pVM->ftm.s.uInterval = uInterval;
1155 else
1156 pVM->ftm.s.uInterval = 50; /* standard sync interval of 50ms */
1157
1158 pVM->ftm.s.uPort = uPort;
1159 pVM->ftm.s.pszAddress = RTStrDup(pszAddress);
1160 if (pszPassword)
1161 pVM->ftm.s.pszPassword = RTStrDup(pszPassword);
1162
1163 int rc = RTSemEventCreate(&pVM->ftm.s.hShutdownEvent);
1164 if (RT_FAILURE(rc))
1165 return rc;
1166
1167 if (fMaster)
1168 {
1169 rc = RTThreadCreate(NULL, ftmR3MasterThread, pVM,
1170 0, RTTHREADTYPE_IO /* higher than normal priority */, 0, "ftmMaster");
1171 if (RT_FAILURE(rc))
1172 return rc;
1173
1174 pVM->fFaultTolerantMaster = true;
1175 if (PGMIsUsingLargePages(pVM))
1176 {
1177 /* Must disable large page usage as 2 MB pages are too big to write monitor. */
1178 LogRel(("FTSync: disabling large page usage.\n"));
1179 PGMSetLargePageUsage(pVM, false);
1180 }
1181 /** @todo might need to disable page fusion as well */
1182
1183 return VMR3PowerOn(pVM->pUVM);
1184 }
1185
1186
1187 /* standby */
1188 rc = RTThreadCreate(NULL, ftmR3StandbyThread, pVM,
1189 0, RTTHREADTYPE_DEFAULT, 0, "ftmStandby");
1190 if (RT_FAILURE(rc))
1191 return rc;
1192
1193 rc = RTTcpServerCreateEx(pszAddress, uPort, &pVM->ftm.s.standby.hServer);
1194 if (RT_FAILURE(rc))
1195 return rc;
1196 pVM->ftm.s.fIsStandbyNode = true;
1197
1198 rc = RTTcpServerListen(pVM->ftm.s.standby.hServer, ftmR3StandbyServeConnection, pVM);
1199 /** @todo deal with the exit code to check if we should activate this standby VM. */
1200 if (pVM->ftm.s.fActivateStandby)
1201 {
1202 /** @todo fallover. */
1203 }
1204
1205 if (pVM->ftm.s.standby.hServer)
1206 {
1207 RTTcpServerDestroy(pVM->ftm.s.standby.hServer);
1208 pVM->ftm.s.standby.hServer = NULL;
1209 }
1210 if (rc == VERR_TCP_SERVER_SHUTDOWN)
1211 rc = VINF_SUCCESS; /* ignore this error; the standby process was cancelled. */
1212 return rc;
1213}
1214
1215/**
1216 * Powers off the fault tolerant virtual machine (standby).
1217 *
1218 * @returns VBox status code.
1219 *
1220 * @param pUVM The user mode VM handle.
1221 */
1222VMMR3DECL(int) FTMR3CancelStandby(PUVM pUVM)
1223{
1224 UVM_ASSERT_VALID_EXT_RETURN(pUVM, VERR_INVALID_VM_HANDLE);
1225 PVM pVM = pUVM->pVM;
1226 VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE);
1227 AssertReturn(!pVM->fFaultTolerantMaster, VERR_NOT_SUPPORTED);
1228 Assert(pVM->ftm.s.standby.hServer);
1229
1230 return RTTcpServerShutdown(pVM->ftm.s.standby.hServer);
1231}
1232
1233/**
1234 * Rendezvous callback used by FTMR3SetCheckpoint
1235 * Sync state + changed memory with the standby node.
1236 *
1237 * This is only called on one of the EMTs while the other ones are waiting for
1238 * it to complete this function.
1239 *
1240 * @returns VINF_SUCCESS (VBox strict status code).
1241 * @param pVM Pointer to the VM.
1242 * @param pVCpu The VMCPU for the EMT we're being called on. Unused.
1243 * @param pvUser Not used.
1244 */
1245static DECLCALLBACK(VBOXSTRICTRC) ftmR3SetCheckpointRendezvous(PVM pVM, PVMCPU pVCpu, void *pvUser)
1246{
1247 int rc = VINF_SUCCESS;
1248 bool fSuspended = false;
1249 NOREF(pVCpu);
1250 NOREF(pvUser);
1251
1252 /* We don't call VMR3Suspend here to avoid the overhead of state changes and notifications. This
1253 * is only a short suspend.
1254 */
1255 STAM_PROFILE_START(&pVM->ftm.s.StatCheckpointPause, a);
1256 PDMR3Suspend(pVM);
1257
1258 /* Hack alert: as EM is responsible for dealing with the suspend state. We must do this here ourselves, but only for this EMT.*/
1259 EMR3NotifySuspend(pVM);
1260 STAM_PROFILE_STOP(&pVM->ftm.s.StatCheckpointPause, a);
1261
1262 STAM_REL_COUNTER_INC(&pVM->ftm.s.StatDeltaVM);
1263
1264 RTSocketRetain(pVM->ftm.s.hSocket); /* For concurrent access by I/O thread and EMT. */
1265
1266 /* Reset the sync state. */
1267 pVM->ftm.s.syncstate.uOffStream = 0;
1268 pVM->ftm.s.syncstate.cbReadBlock = 0;
1269 pVM->ftm.s.syncstate.fStopReading = false;
1270 pVM->ftm.s.syncstate.fIOError = false;
1271 pVM->ftm.s.syncstate.fEndOfStream = false;
1272
1273 rc = ftmR3TcpSubmitCommand(pVM, "checkpoint");
1274 AssertRC(rc);
1275
1276 pVM->ftm.s.fDeltaLoadSaveActive = true;
1277 rc = VMR3SaveFT(pVM->pUVM, &g_ftmR3TcpOps, pVM, &fSuspended, true /* fSkipStateChanges */);
1278 pVM->ftm.s.fDeltaLoadSaveActive = false;
1279 AssertRC(rc);
1280
1281 rc = ftmR3TcpReadACK(pVM, "checkpoint-complete");
1282 AssertRC(rc);
1283
1284 RTSocketRelease(pVM->ftm.s.hSocket);
1285
1286 /* Write protect all memory. */
1287 rc = PGMR3PhysWriteProtectRAM(pVM);
1288 AssertRC(rc);
1289
1290 /* We don't call VMR3Resume here to avoid the overhead of state changes and notifications. This
1291 * is only a short suspend.
1292 */
1293 STAM_PROFILE_START(&pVM->ftm.s.StatCheckpointResume, b);
1294 PGMR3ResetNoMorePhysWritesFlag(pVM);
1295 PDMR3Resume(pVM);
1296
1297 /* Hack alert as EM is responsible for dealing with the suspend state. We must do this here ourselves, but only for this EMT.*/
1298 EMR3NotifyResume(pVM);
1299 STAM_PROFILE_STOP(&pVM->ftm.s.StatCheckpointResume, b);
1300
1301 return rc;
1302}
1303
1304/**
1305 * Performs a full sync to the standby node
1306 *
1307 * @returns VBox status code.
1308 *
1309 * @param pVM Pointer to the VM.
1310 * @param enmCheckpoint Checkpoint type
1311 */
1312VMMR3_INT_DECL(int) FTMR3SetCheckpoint(PVM pVM, FTMCHECKPOINTTYPE enmCheckpoint)
1313{
1314 int rc;
1315
1316 if (!pVM->fFaultTolerantMaster)
1317 return VINF_SUCCESS;
1318
1319 switch (enmCheckpoint)
1320 {
1321 case FTMCHECKPOINTTYPE_NETWORK:
1322 STAM_REL_COUNTER_INC(&pVM->ftm.s.StatCheckpointNetwork);
1323 break;
1324
1325 case FTMCHECKPOINTTYPE_STORAGE:
1326 STAM_REL_COUNTER_INC(&pVM->ftm.s.StatCheckpointStorage);
1327 break;
1328
1329 default:
1330 AssertMsgFailedReturn(("%d\n", enmCheckpoint), VERR_INVALID_PARAMETER);
1331 }
1332
1333 pVM->ftm.s.fCheckpointingActive = true;
1334 if (VM_IS_EMT(pVM))
1335 {
1336 PVMCPU pVCpu = VMMGetCpu(pVM);
1337
1338 /* We must take special care here as the memory sync is competing with us and requires a responsive EMT. */
1339 while ((rc = PDMCritSectTryEnter(&pVM->ftm.s.CritSect)) == VERR_SEM_BUSY)
1340 {
1341 if (VM_FF_IS_PENDING(pVM, VM_FF_EMT_RENDEZVOUS))
1342 {
1343 rc = VMMR3EmtRendezvousFF(pVM, pVCpu);
1344 AssertRC(rc);
1345 }
1346
1347 if (VM_FF_IS_PENDING(pVM, VM_FF_REQUEST))
1348 {
1349 rc = VMR3ReqProcessU(pVM->pUVM, VMCPUID_ANY, true /*fPriorityOnly*/);
1350 AssertRC(rc);
1351 }
1352 }
1353 }
1354 else
1355 rc = PDMCritSectEnter(&pVM->ftm.s.CritSect, VERR_SEM_BUSY);
1356
1357 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc));
1358
1359 STAM_PROFILE_START(&pVM->ftm.s.StatCheckpoint, a);
1360
1361 rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, ftmR3SetCheckpointRendezvous, NULL);
1362
1363 STAM_PROFILE_STOP(&pVM->ftm.s.StatCheckpoint, a);
1364
1365 PDMCritSectLeave(&pVM->ftm.s.CritSect);
1366 pVM->ftm.s.fCheckpointingActive = false;
1367
1368 return rc;
1369}
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use