VirtualBox

source: vbox/trunk/src/VBox/VMM/FTM.cpp@ 32171

Last change on this file since 32171 was 32171, checked in by vboxsync, 15 years ago

FT updates

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 34.9 KB
Line 
1/* $Id: FTM.cpp 32171 2010-09-01 09:54:45Z vboxsync $ */
2/** @file
3 * FTM - Fault Tolerance Manager
4 */
5
6/*
7 * Copyright (C) 2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_FTM
23#include "FTMInternal.h"
24#include <VBox/vm.h>
25#include <VBox/vmm.h>
26#include <VBox/err.h>
27#include <VBox/param.h>
28#include <VBox/ssm.h>
29#include <VBox/log.h>
30#include <VBox/pgm.h>
31
32#include <iprt/assert.h>
33#include <iprt/thread.h>
34#include <iprt/string.h>
35#include <iprt/mem.h>
36#include <iprt/tcp.h>
37#include <iprt/socket.h>
38#include <iprt/semaphore.h>
39#include <iprt/asm.h>
40
41#include <include/internal/vm.h>
42#include <include/internal/em.h>
43
44/*******************************************************************************
45 * Structures and Typedefs *
46 *******************************************************************************/
47
48/**
49 * TCP stream header.
50 *
51 * This is an extra layer for fixing the problem with figuring out when the SSM
52 * stream ends.
53 */
54typedef struct FTMTCPHDR
55{
56 /** Magic value. */
57 uint32_t u32Magic;
58 /** The size of the data block following this header.
59 * 0 indicates the end of the stream, while UINT32_MAX indicates
60 * cancelation. */
61 uint32_t cb;
62} FTMTCPHDR;
63/** Magic value for FTMTCPHDR::u32Magic. (Egberto Gismonti Amin) */
64#define FTMTCPHDR_MAGIC UINT32_C(0x19471205)
65/** The max block size. */
66#define FTMTCPHDR_MAX_SIZE UINT32_C(0x00fffff8)
67
68/**
69 * TCP stream header.
70 *
71 * This is an extra layer for fixing the problem with figuring out when the SSM
72 * stream ends.
73 */
74typedef struct FTMTCPHDRMEM
75{
76 /** Magic value. */
77 uint32_t u32Magic;
78 /** Size (Uncompressed) of the pages following the header. */
79 uint32_t cbPageRange;
80 /** GC Physical address of the page(s) to sync. */
81 RTGCPHYS GCPhys;
82 /** The size of the data block following this header.
83 * 0 indicates the end of the stream, while UINT32_MAX indicates
84 * cancelation. */
85 uint32_t cb;
86} FTMTCPHDRMEM;
87
88/*******************************************************************************
89* Global Variables *
90*******************************************************************************/
91static const char g_szWelcome[] = "VirtualBox-Fault-Tolerance-Sync-1.0\n";
92
93/**
94 * Initializes the FTM.
95 *
96 * @returns VBox status code.
97 * @param pVM The VM to operate on.
98 */
99VMMR3DECL(int) FTMR3Init(PVM pVM)
100{
101 /*
102 * Assert alignment and sizes.
103 */
104 AssertCompile(sizeof(pVM->ftm.s) <= sizeof(pVM->ftm.padding));
105 AssertCompileMemberAlignment(FTM, CritSect, sizeof(uintptr_t));
106
107 /** @todo saved state for master nodes! */
108 pVM->ftm.s.pszAddress = NULL;
109 pVM->ftm.s.pszPassword = NULL;
110 pVM->fFaultTolerantMaster = false;
111 pVM->ftm.s.fIsStandbyNode = false;
112 pVM->ftm.s.standby.hServer = NIL_RTTCPSERVER;
113 pVM->ftm.s.master.hShutdownEvent = NIL_RTSEMEVENT;
114 pVM->ftm.s.hSocket = NIL_RTSOCKET;
115
116 /*
117 * Initialize the PGM critical section.
118 */
119 int rc = PDMR3CritSectInit(pVM, &pVM->ftm.s.CritSect, RT_SRC_POS, "FTM");
120 AssertRCReturn(rc, rc);
121
122 /*
123 * Register statistics.
124 */
125 STAM_REL_REG(pVM, &pVM->ftm.s.StatReceivedMem, STAMTYPE_COUNTER, "/FT/Received/Mem", STAMUNIT_BYTES, "The amount of memory pages that was received.");
126 STAM_REL_REG(pVM, &pVM->ftm.s.StatReceivedState, STAMTYPE_COUNTER, "/FT/Received/State", STAMUNIT_BYTES, "The amount of state information that was received.");
127 STAM_REL_REG(pVM, &pVM->ftm.s.StatSentMem, STAMTYPE_COUNTER, "/FT/Sent/Mem", STAMUNIT_BYTES, "The amount of memory pages that was sent.");
128 STAM_REL_REG(pVM, &pVM->ftm.s.StatSentState, STAMTYPE_COUNTER, "/FT/Sent/State", STAMUNIT_BYTES, "The amount of state information that was sent.");
129 STAM_REL_REG(pVM, &pVM->ftm.s.StatDeltaVM, STAMTYPE_COUNTER, "/FT/Sync/DeltaVM", STAMUNIT_OCCURENCES, "Number of delta vm syncs.");
130 STAM_REL_REG(pVM, &pVM->ftm.s.StatFullSync, STAMTYPE_COUNTER, "/FT/Sync/Full", STAMUNIT_OCCURENCES, "Number of full vm syncs.");
131 STAM_REL_REG(pVM, &pVM->ftm.s.StatDeltaMem, STAMTYPE_COUNTER, "/FT/Sync/DeltaMem", STAMUNIT_OCCURENCES, "Number of delta mem syncs.");
132 STAM_REL_REG(pVM, &pVM->ftm.s.StatCheckpointStorage, STAMTYPE_COUNTER, "/FT/Checkpoint/Storage", STAMUNIT_OCCURENCES, "Number of storage checkpoints.");
133 STAM_REL_REG(pVM, &pVM->ftm.s.StatCheckpointNetwork, STAMTYPE_COUNTER, "/FT/Checkpoint/Network", STAMUNIT_OCCURENCES, "Number of network checkpoints.");
134
135 return VINF_SUCCESS;
136}
137
138/**
139 * Terminates the FTM.
140 *
141 * Termination means cleaning up and freeing all resources,
142 * the VM itself is at this point powered off or suspended.
143 *
144 * @returns VBox status code.
145 * @param pVM The VM to operate on.
146 */
147VMMR3DECL(int) FTMR3Term(PVM pVM)
148{
149 if (pVM->ftm.s.master.hShutdownEvent != NIL_RTSEMEVENT)
150 {
151 RTSemEventDestroy(pVM->ftm.s.master.hShutdownEvent);
152 pVM->ftm.s.master.hShutdownEvent = NIL_RTSEMEVENT;
153 }
154 if (pVM->ftm.s.hSocket != NIL_RTSOCKET)
155 {
156 RTTcpClientClose(pVM->ftm.s.hSocket);
157 pVM->ftm.s.hSocket = NIL_RTSOCKET;
158 }
159 if (pVM->ftm.s.standby.hServer)
160 {
161 RTTcpServerDestroy(pVM->ftm.s.standby.hServer);
162 pVM->ftm.s.standby.hServer = NULL;
163 }
164 if (pVM->ftm.s.pszAddress)
165 RTMemFree(pVM->ftm.s.pszAddress);
166 if (pVM->ftm.s.pszPassword)
167 RTMemFree(pVM->ftm.s.pszPassword);
168
169 pVM->ftm.s.pszAddress = NULL;
170 pVM->ftm.s.pszPassword = NULL;
171
172 PDMR3CritSectDelete(&pVM->ftm.s.CritSect);
173 return VINF_SUCCESS;
174}
175
176
177static int ftmR3TcpWriteACK(PVM pVM)
178{
179 int rc = RTTcpWrite(pVM->ftm.s.hSocket, "ACK\n", sizeof("ACK\n") - 1);
180 if (RT_FAILURE(rc))
181 {
182 LogRel(("FTSync: RTTcpWrite(,ACK,) -> %Rrc\n", rc));
183 }
184 return rc;
185}
186
187
188static int ftmR3TcpWriteNACK(PVM pVM, int32_t rc2, const char *pszMsgText = NULL)
189{
190 char szMsg[256];
191 size_t cch;
192 if (pszMsgText && *pszMsgText)
193 {
194 cch = RTStrPrintf(szMsg, sizeof(szMsg), "NACK=%d;%s\n", rc2, pszMsgText);
195 for (size_t off = 6; off + 1 < cch; off++)
196 if (szMsg[off] == '\n')
197 szMsg[off] = '\r';
198 }
199 else
200 cch = RTStrPrintf(szMsg, sizeof(szMsg), "NACK=%d\n", rc2);
201 int rc = RTTcpWrite(pVM->ftm.s.hSocket, szMsg, cch);
202 if (RT_FAILURE(rc))
203 LogRel(("FTSync: RTTcpWrite(,%s,%zu) -> %Rrc\n", szMsg, cch, rc));
204 return rc;
205}
206
207/**
208 * Reads a string from the socket.
209 *
210 * @returns VBox status code.
211 *
212 * @param pState The teleporter state structure.
213 * @param pszBuf The output buffer.
214 * @param cchBuf The size of the output buffer.
215 *
216 */
217static int ftmR3TcpReadLine(PVM pVM, char *pszBuf, size_t cchBuf)
218{
219 char *pszStart = pszBuf;
220 RTSOCKET Sock = pVM->ftm.s.hSocket;
221
222 AssertReturn(cchBuf > 1, VERR_INTERNAL_ERROR);
223 *pszBuf = '\0';
224
225 /* dead simple approach. */
226 for (;;)
227 {
228 char ch;
229 int rc = RTTcpRead(Sock, &ch, sizeof(ch), NULL);
230 if (RT_FAILURE(rc))
231 {
232 LogRel(("FTSync: RTTcpRead -> %Rrc while reading string ('%s')\n", rc, pszStart));
233 return rc;
234 }
235 if ( ch == '\n'
236 || ch == '\0')
237 return VINF_SUCCESS;
238 if (cchBuf <= 1)
239 {
240 LogRel(("FTSync: String buffer overflow: '%s'\n", pszStart));
241 return VERR_BUFFER_OVERFLOW;
242 }
243 *pszBuf++ = ch;
244 *pszBuf = '\0';
245 cchBuf--;
246 }
247}
248
249/**
250 * Reads an ACK or NACK.
251 *
252 * @returns VBox status code.
253 * @param pVM The VM to operate on.
254 * @param pszWhich Which ACK is this this?
255 * @param pszNAckMsg Optional NACK message.
256 */
257static int ftmR3TcpReadACK(PVM pVM, const char *pszWhich, const char *pszNAckMsg = NULL)
258{
259 char szMsg[256];
260 int rc = ftmR3TcpReadLine(pVM, szMsg, sizeof(szMsg));
261 if (RT_FAILURE(rc))
262 return rc;
263
264 if (!strcmp(szMsg, "ACK"))
265 return VINF_SUCCESS;
266
267 if (!strncmp(szMsg, "NACK=", sizeof("NACK=") - 1))
268 {
269 char *pszMsgText = strchr(szMsg, ';');
270 if (pszMsgText)
271 *pszMsgText++ = '\0';
272
273 int32_t vrc2;
274 rc = RTStrToInt32Full(&szMsg[sizeof("NACK=") - 1], 10, &vrc2);
275 if (rc == VINF_SUCCESS)
276 {
277 /*
278 * Well formed NACK, transform it into an error.
279 */
280 if (pszNAckMsg)
281 {
282 LogRel(("FTSync: %s: NACK=%Rrc (%d)\n", pszWhich, vrc2, vrc2));
283 return VERR_INTERNAL_ERROR;
284 }
285
286 if (pszMsgText)
287 {
288 pszMsgText = RTStrStrip(pszMsgText);
289 for (size_t off = 0; pszMsgText[off]; off++)
290 if (pszMsgText[off] == '\r')
291 pszMsgText[off] = '\n';
292
293 LogRel(("FTSync: %s: NACK=%Rrc (%d) - '%s'\n", pszWhich, vrc2, vrc2, pszMsgText));
294 }
295 return VERR_INTERNAL_ERROR_2;
296 }
297
298 if (pszMsgText)
299 pszMsgText[-1] = ';';
300 }
301 return VERR_INTERNAL_ERROR_3;
302}
303
304/**
305 * Submitts a command to the destination and waits for the ACK.
306 *
307 * @returns VBox status code.
308 *
309 * @param pVM The VM to operate on.
310 * @param pszCommand The command.
311 * @param fWaitForAck Whether to wait for the ACK.
312 */
313static int ftmR3TcpSubmitCommand(PVM pVM, const char *pszCommand, bool fWaitForAck = true)
314{
315 int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, pszCommand, strlen(pszCommand), "\n", sizeof("\n") - 1);
316 if (RT_FAILURE(rc))
317 return rc;
318 if (!fWaitForAck)
319 return VINF_SUCCESS;
320 return ftmR3TcpReadACK(pVM, pszCommand);
321}
322
323/**
324 * @copydoc SSMSTRMOPS::pfnWrite
325 */
326static DECLCALLBACK(int) ftmR3TcpOpWrite(void *pvUser, uint64_t offStream, const void *pvBuf, size_t cbToWrite)
327{
328 PVM pVM = (PVM)pvUser;
329
330 AssertReturn(cbToWrite > 0, VINF_SUCCESS);
331 AssertReturn(cbToWrite < UINT32_MAX, VERR_OUT_OF_RANGE);
332 AssertReturn(pVM->fFaultTolerantMaster, VERR_INVALID_HANDLE);
333
334 for (;;)
335 {
336 FTMTCPHDR Hdr;
337 Hdr.u32Magic = FTMTCPHDR_MAGIC;
338 Hdr.cb = RT_MIN((uint32_t)cbToWrite, FTMTCPHDR_MAX_SIZE);
339 int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, &Hdr, sizeof(Hdr), pvBuf, (size_t)Hdr.cb);
340 if (RT_FAILURE(rc))
341 {
342 LogRel(("FTSync/TCP: Write error: %Rrc (cb=%#x)\n", rc, Hdr.cb));
343 return rc;
344 }
345 pVM->ftm.s.StatSentState.c += Hdr.cb + sizeof(Hdr);
346 pVM->ftm.s.syncstate.uOffStream += Hdr.cb;
347 if (Hdr.cb == cbToWrite)
348 return VINF_SUCCESS;
349
350 /* advance */
351 cbToWrite -= Hdr.cb;
352 pvBuf = (uint8_t const *)pvBuf + Hdr.cb;
353 }
354}
355
356
357/**
358 * Selects and poll for close condition.
359 *
360 * We can use a relatively high poll timeout here since it's only used to get
361 * us out of error paths. In the normal cause of events, we'll get a
362 * end-of-stream header.
363 *
364 * @returns VBox status code.
365 *
366 * @param pState The teleporter state data.
367 */
368static int ftmR3TcpReadSelect(PVM pVM)
369{
370 int rc;
371 do
372 {
373 rc = RTTcpSelectOne(pVM->ftm.s.hSocket, 1000);
374 if (RT_FAILURE(rc) && rc != VERR_TIMEOUT)
375 {
376 pVM->ftm.s.syncstate.fIOError = true;
377 LogRel(("FTSync/TCP: Header select error: %Rrc\n", rc));
378 break;
379 }
380 if (pVM->ftm.s.syncstate.fStopReading)
381 {
382 rc = VERR_EOF;
383 break;
384 }
385 } while (rc == VERR_TIMEOUT);
386 return rc;
387}
388
389
390/**
391 * @copydoc SSMSTRMOPS::pfnRead
392 */
393static DECLCALLBACK(int) ftmR3TcpOpRead(void *pvUser, uint64_t offStream, void *pvBuf, size_t cbToRead, size_t *pcbRead)
394{
395 PVM pVM = (PVM)pvUser;
396 AssertReturn(!pVM->fFaultTolerantMaster, VERR_INVALID_HANDLE);
397
398 for (;;)
399 {
400 int rc;
401
402 /*
403 * Check for various conditions and may have been signalled.
404 */
405 if (pVM->ftm.s.syncstate.fEndOfStream)
406 return VERR_EOF;
407 if (pVM->ftm.s.syncstate.fStopReading)
408 return VERR_EOF;
409 if (pVM->ftm.s.syncstate.fIOError)
410 return VERR_IO_GEN_FAILURE;
411
412 /*
413 * If there is no more data in the current block, read the next
414 * block header.
415 */
416 if (!pVM->ftm.s.syncstate.cbReadBlock)
417 {
418 rc = ftmR3TcpReadSelect(pVM);
419 if (RT_FAILURE(rc))
420 return rc;
421 FTMTCPHDR Hdr;
422 rc = RTTcpRead(pVM->ftm.s.hSocket, &Hdr, sizeof(Hdr), NULL);
423 if (RT_FAILURE(rc))
424 {
425 pVM->ftm.s.syncstate.fIOError = true;
426 LogRel(("FTSync/TCP: Header read error: %Rrc\n", rc));
427 return rc;
428 }
429 pVM->ftm.s.StatReceivedState.c += sizeof(Hdr);
430
431 if (RT_UNLIKELY( Hdr.u32Magic != FTMTCPHDR_MAGIC
432 || Hdr.cb > FTMTCPHDR_MAX_SIZE
433 || Hdr.cb == 0))
434 {
435 if ( Hdr.u32Magic == FTMTCPHDR_MAGIC
436 && ( Hdr.cb == 0
437 || Hdr.cb == UINT32_MAX)
438 )
439 {
440 pVM->ftm.s.syncstate.fEndOfStream = true;
441 pVM->ftm.s.syncstate.cbReadBlock = 0;
442 return Hdr.cb ? VERR_SSM_CANCELLED : VERR_EOF;
443 }
444 pVM->ftm.s.syncstate.fIOError = true;
445 LogRel(("FTSync/TCP: Invalid block: u32Magic=%#x cb=%#x\n", Hdr.u32Magic, Hdr.cb));
446 return VERR_IO_GEN_FAILURE;
447 }
448
449 pVM->ftm.s.syncstate.cbReadBlock = Hdr.cb;
450 if (pVM->ftm.s.syncstate.fStopReading)
451 return VERR_EOF;
452 }
453
454 /*
455 * Read more data.
456 */
457 rc = ftmR3TcpReadSelect(pVM);
458 if (RT_FAILURE(rc))
459 return rc;
460
461 uint32_t cb = (uint32_t)RT_MIN(pVM->ftm.s.syncstate.cbReadBlock, cbToRead);
462 rc = RTTcpRead(pVM->ftm.s.hSocket, pvBuf, cb, pcbRead);
463 if (RT_FAILURE(rc))
464 {
465 pVM->ftm.s.syncstate.fIOError = true;
466 LogRel(("FTSync/TCP: Data read error: %Rrc (cb=%#x)\n", rc, cb));
467 return rc;
468 }
469 if (pcbRead)
470 {
471 cb = (uint32_t)*pcbRead;
472 pVM->ftm.s.StatReceivedState.c += cb;
473 pVM->ftm.s.syncstate.uOffStream += cb;
474 pVM->ftm.s.syncstate.cbReadBlock -= cb;
475 return VINF_SUCCESS;
476 }
477 pVM->ftm.s.StatReceivedState.c += cb;
478 pVM->ftm.s.syncstate.uOffStream += cb;
479 pVM->ftm.s.syncstate.cbReadBlock -= cb;
480 if (cbToRead == cb)
481 return VINF_SUCCESS;
482
483 /* Advance to the next block. */
484 cbToRead -= cb;
485 pvBuf = (uint8_t *)pvBuf + cb;
486 }
487}
488
489
490/**
491 * @copydoc SSMSTRMOPS::pfnSeek
492 */
493static DECLCALLBACK(int) ftmR3TcpOpSeek(void *pvUser, int64_t offSeek, unsigned uMethod, uint64_t *poffActual)
494{
495 return VERR_NOT_SUPPORTED;
496}
497
498
499/**
500 * @copydoc SSMSTRMOPS::pfnTell
501 */
502static DECLCALLBACK(uint64_t) ftmR3TcpOpTell(void *pvUser)
503{
504 PVM pVM = (PVM)pvUser;
505 return pVM->ftm.s.syncstate.uOffStream;
506}
507
508
509/**
510 * @copydoc SSMSTRMOPS::pfnSize
511 */
512static DECLCALLBACK(int) ftmR3TcpOpSize(void *pvUser, uint64_t *pcb)
513{
514 return VERR_NOT_SUPPORTED;
515}
516
517
518/**
519 * @copydoc SSMSTRMOPS::pfnIsOk
520 */
521static DECLCALLBACK(int) ftmR3TcpOpIsOk(void *pvUser)
522{
523 PVM pVM = (PVM)pvUser;
524
525 if (pVM->fFaultTolerantMaster)
526 {
527 /* Poll for incoming NACKs and errors from the other side */
528 int rc = RTTcpSelectOne(pVM->ftm.s.hSocket, 0);
529 if (rc != VERR_TIMEOUT)
530 {
531 if (RT_SUCCESS(rc))
532 {
533 LogRel(("FTSync/TCP: Incoming data detect by IsOk, assuming it is a cancellation NACK.\n"));
534 rc = VERR_SSM_CANCELLED;
535 }
536 else
537 LogRel(("FTSync/TCP: RTTcpSelectOne -> %Rrc (IsOk).\n", rc));
538 return rc;
539 }
540 }
541
542 return VINF_SUCCESS;
543}
544
545
546/**
547 * @copydoc SSMSTRMOPS::pfnClose
548 */
549static DECLCALLBACK(int) ftmR3TcpOpClose(void *pvUser, bool fCanceled)
550{
551 PVM pVM = (PVM)pvUser;
552
553 if (pVM->fFaultTolerantMaster)
554 {
555 FTMTCPHDR EofHdr;
556 EofHdr.u32Magic = FTMTCPHDR_MAGIC;
557 EofHdr.cb = fCanceled ? UINT32_MAX : 0;
558 int rc = RTTcpWrite(pVM->ftm.s.hSocket, &EofHdr, sizeof(EofHdr));
559 if (RT_FAILURE(rc))
560 {
561 LogRel(("FTSync/TCP: EOF Header write error: %Rrc\n", rc));
562 return rc;
563 }
564 }
565 else
566 {
567 ASMAtomicWriteBool(&pVM->ftm.s.syncstate.fStopReading, true);
568 }
569
570 return VINF_SUCCESS;
571}
572
573
574/**
575 * Method table for a TCP based stream.
576 */
577static SSMSTRMOPS const g_ftmR3TcpOps =
578{
579 SSMSTRMOPS_VERSION,
580 ftmR3TcpOpWrite,
581 ftmR3TcpOpRead,
582 ftmR3TcpOpSeek,
583 ftmR3TcpOpTell,
584 ftmR3TcpOpSize,
585 ftmR3TcpOpIsOk,
586 ftmR3TcpOpClose,
587 SSMSTRMOPS_VERSION
588};
589
590/**
591 * VMR3ReqCallWait callback
592 *
593 * @param pVM The VM handle.
594 *
595 */
596static DECLCALLBACK(void) ftmR3WriteProtectMemory(PVM pVM)
597{
598 int rc = PGMR3PhysWriteProtectRAM(pVM);
599 AssertRC(rc);
600}
601
602/**
603 * Sync the VM state partially or fully
604 *
605 * @returns VBox status code.
606 * @param pVM The VM handle.
607 * @param enmState Which state to sync
608 */
609static int ftmR3PerformSync(PVM pVM, FTMSYNCSTATE enmState)
610{
611 int rc;
612 bool fFullSync = false;
613
614 if (enmState != FTMSYNCSTATE_DELTA_MEMORY)
615 {
616 rc = VMR3Suspend(pVM);
617 AssertRCReturn(rc, rc);
618 /** Hack alert as EM is responsible for dealing with the suspend state. We must do this here ourselves, but only for this EMT.*/
619 if (VM_IS_EMT(pVM))
620 EMR3NotifySuspend(pVM);
621 }
622
623 switch (enmState)
624 {
625 case FTMSYNCSTATE_FULL:
626 fFullSync = true;
627 /* no break */
628 case FTMSYNCSTATE_DELTA_VM:
629 {
630 bool fSuspended = false;
631
632 STAM_REL_COUNTER_INC((fFullSync) ? &pVM->ftm.s.StatFullSync : &pVM->ftm.s.StatDeltaVM);
633
634 RTSocketRetain(pVM->ftm.s.hSocket); /* For concurrent access by I/O thread and EMT. */
635
636 /* Reset the sync state. */
637 pVM->ftm.s.syncstate.uOffStream = 0;
638 pVM->ftm.s.syncstate.cbReadBlock = 0;
639 pVM->ftm.s.syncstate.fStopReading = false;
640 pVM->ftm.s.syncstate.fIOError = false;
641 pVM->ftm.s.syncstate.fEndOfStream = false;
642
643 rc = ftmR3TcpSubmitCommand(pVM, (fFullSync) ? "full-sync" : "checkpoint");
644 AssertRC(rc);
645
646 pVM->ftm.s.fDeltaLoadSaveActive = (fFullSync == false);
647 rc = VMR3SaveFT(pVM, &g_ftmR3TcpOps, pVM, &fSuspended);
648 pVM->ftm.s.fDeltaLoadSaveActive = false;
649 AssertRC(rc);
650
651 rc = ftmR3TcpReadACK(pVM, (fFullSync) ? "full-sync-complete" : "checkpoint-complete");
652 AssertRC(rc);
653
654 RTSocketRelease(pVM->ftm.s.hSocket);
655 break;
656 }
657
658 case FTMSYNCSTATE_DELTA_MEMORY:
659 /* Nothing to do as we sync the memory in an async thread; no need to block EMT. */
660 STAM_REL_COUNTER_INC(&pVM->ftm.s.StatDeltaMem);
661 break;
662 }
663
664 /* Write protect all memory. */
665 rc = VMR3ReqCallWait(pVM, VMCPUID_ANY, (PFNRT)ftmR3WriteProtectMemory, 1, pVM);
666 AssertRCReturn(rc, rc);
667
668 if (enmState != FTMSYNCSTATE_DELTA_MEMORY)
669 {
670 rc = VMR3Resume(pVM);
671 AssertRCReturn(rc, rc);
672
673 /** Hack alert as EM is responsible for dealing with the suspend state. We must do this here ourselves, but only for this EMT.*/
674 if (VM_IS_EMT(pVM))
675 EMR3NotifyResume(pVM);
676 }
677 return VINF_SUCCESS;
678}
679
680/**
681 * PGMR3PhysEnumDirtyFTPages callback for syncing dirty physical pages
682 *
683 * @param pVM VM Handle.
684 * @param GCPhys GC physical address
685 * @param pRange HC virtual address of the page(s)
686 * @param cbRange Size of the dirty range in bytes.
687 * @param pvUser User argument
688 */
689static DECLCALLBACK(int) ftmR3SyncDirtyPage(PVM pVM, RTGCPHYS GCPhys, uint8_t *pRange, unsigned cbRange, void *pvUser)
690{
691 FTMTCPHDRMEM Hdr;
692 Hdr.u32Magic = FTMTCPHDR_MAGIC;
693 Hdr.GCPhys = GCPhys;
694 Hdr.cbPageRange = cbRange;
695 Hdr.cb = cbRange;
696 /** @todo compress page(s). */
697 int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, &Hdr, sizeof(Hdr), pRange, (size_t)Hdr.cb);
698 if (RT_FAILURE(rc))
699 {
700 LogRel(("FTSync/TCP: Write error (ftmR3SyncDirtyPage): %Rrc (cb=%#x)\n", rc, Hdr.cb));
701 return rc;
702 }
703 pVM->ftm.s.StatSentMem.c += Hdr.cb + sizeof(Hdr);
704 return VINF_SUCCESS;
705}
706
707/**
708 * Thread function which starts syncing process for this master VM
709 *
710 * @param Thread The thread id.
711 * @param pvUser Not used
712 * @return VINF_SUCCESS (ignored).
713 *
714 */
715static DECLCALLBACK(int) ftmR3MasterThread(RTTHREAD Thread, void *pvUser)
716{
717 int rc = VINF_SUCCESS;
718 PVM pVM = (PVM)pvUser;
719
720 for (;;)
721 {
722 /*
723 * Try connect to the standby machine.
724 */
725 Log(("ftmR3MasterThread: client connect to %s %d\n", pVM->ftm.s.pszAddress, pVM->ftm.s.uPort));
726 rc = RTTcpClientConnect(pVM->ftm.s.pszAddress, pVM->ftm.s.uPort, &pVM->ftm.s.hSocket);
727 if (RT_SUCCESS(rc))
728 {
729 Log(("ftmR3MasterThread: CONNECTED\n"));
730
731 /* Disable Nagle. */
732 rc = RTTcpSetSendCoalescing(pVM->ftm.s.hSocket, false /*fEnable*/);
733 AssertRC(rc);
734
735 /* Read and check the welcome message. */
736 char szLine[RT_MAX(128, sizeof(g_szWelcome))];
737 RT_ZERO(szLine);
738 rc = RTTcpRead(pVM->ftm.s.hSocket, szLine, sizeof(g_szWelcome) - 1, NULL);
739 if ( RT_SUCCESS(rc)
740 && !strcmp(szLine, g_szWelcome))
741 {
742 /* password */
743 if (pVM->ftm.s.pszPassword)
744 rc = RTTcpWrite(pVM->ftm.s.hSocket, pVM->ftm.s.pszPassword, strlen(pVM->ftm.s.pszPassword));
745
746 if (RT_SUCCESS(rc))
747 {
748 /* ACK */
749 rc = ftmR3TcpReadACK(pVM, "password", "Invalid password");
750 if (RT_SUCCESS(rc))
751 {
752 /** todo: verify VM config. */
753 break;
754 }
755 }
756 }
757 /* Failed, so don't bother anymore. */
758 return VINF_SUCCESS;
759 }
760 rc = RTSemEventWait(pVM->ftm.s.master.hShutdownEvent, 1000 /* 1 second */);
761 if (rc != VERR_TIMEOUT)
762 return VINF_SUCCESS; /* told to quit */
763 }
764
765 /* Successfully initialized the connection to the standby node.
766 * Start the sync process.
767 */
768
769 /* First sync all memory and write protect everything so
770 * we can send changed pages later on.
771 */
772
773 rc = ftmR3PerformSync(pVM, FTMSYNCSTATE_FULL);
774
775 for (;;)
776 {
777 rc = RTSemEventWait(pVM->ftm.s.master.hShutdownEvent, pVM->ftm.s.uInterval);
778 if (rc != VERR_TIMEOUT)
779 break; /* told to quit */
780
781 if (!pVM->ftm.s.fCheckpointingActive)
782 {
783 rc = PDMCritSectEnter(&pVM->ftm.s.CritSect, VERR_SEM_BUSY);
784 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc));
785
786 rc = ftmR3TcpSubmitCommand(pVM, "mem-sync");
787 AssertRC(rc);
788
789 /* sync the changed memory with the standby node. */
790 rc = ftmR3PerformSync(pVM, FTMSYNCSTATE_DELTA_MEMORY);
791
792 /* Enumerate all dirty pages and send them to the standby VM. */
793 rc = PGMR3PhysEnumDirtyFTPages(pVM, ftmR3SyncDirtyPage, NULL /* pvUser */);
794 AssertRC(rc);
795
796 /* Send last memory header to signal the end. */
797 FTMTCPHDRMEM Hdr;
798 Hdr.u32Magic = FTMTCPHDR_MAGIC;
799 Hdr.GCPhys = 0;
800 Hdr.cbPageRange = 0;
801 Hdr.cb = 0;
802 rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 1, &Hdr, sizeof(Hdr));
803 if (RT_FAILURE(rc))
804 LogRel(("FTSync/TCP: Write error (ftmR3MasterThread): %Rrc (cb=%#x)\n", rc, Hdr.cb));
805
806 rc = ftmR3TcpReadACK(pVM, "mem-sync-complete");
807 AssertRC(rc);
808
809 PDMCritSectLeave(&pVM->ftm.s.CritSect);
810 }
811 }
812 return rc;
813}
814
815/**
816 * Listen for incoming traffic destined for the standby VM.
817 *
818 * @copydoc FNRTTCPSERVE
819 *
820 * @returns VINF_SUCCESS or VERR_TCP_SERVER_STOP.
821 */
822static DECLCALLBACK(int) ftmR3StandbyServeConnection(RTSOCKET Sock, void *pvUser)
823{
824 PVM pVM = (PVM)pvUser;
825
826 pVM->ftm.s.hSocket = Sock;
827
828 /*
829 * Disable Nagle.
830 */
831 int rc = RTTcpSetSendCoalescing(Sock, false /*fEnable*/);
832 AssertRC(rc);
833
834 /* Send the welcome message to the master node. */
835 rc = RTTcpWrite(Sock, g_szWelcome, sizeof(g_szWelcome) - 1);
836 if (RT_FAILURE(rc))
837 {
838 LogRel(("Teleporter: Failed to write welcome message: %Rrc\n", rc));
839 return VINF_SUCCESS;
840 }
841
842 /*
843 * Password.
844 */
845 const char *pszPassword = pVM->ftm.s.pszPassword;
846 if (pszPassword)
847 {
848 unsigned off = 0;
849 while (pszPassword[off])
850 {
851 char ch;
852 rc = RTTcpRead(Sock, &ch, sizeof(ch), NULL);
853 if ( RT_FAILURE(rc)
854 || pszPassword[off] != ch)
855 {
856 if (RT_FAILURE(rc))
857 LogRel(("FTSync: Password read failure (off=%u): %Rrc\n", off, rc));
858 else
859 LogRel(("FTSync: Invalid password (off=%u)\n", off));
860 ftmR3TcpWriteNACK(pVM, VERR_AUTHENTICATION_FAILURE);
861 return VINF_SUCCESS;
862 }
863 off++;
864 }
865 }
866 rc = ftmR3TcpWriteACK(pVM);
867 if (RT_FAILURE(rc))
868 return VINF_SUCCESS;
869
870 /** todo: verify VM config. */
871
872 /*
873 * Stop the server.
874 *
875 * Note! After this point we must return VERR_TCP_SERVER_STOP, while prior
876 * to it we must not return that value!
877 */
878 RTTcpServerShutdown(pVM->ftm.s.standby.hServer);
879
880 /*
881 * Command processing loop.
882 */
883 bool fDone = false;
884 for (;;)
885 {
886 bool fFullSync = false;
887 char szCmd[128];
888
889 rc = ftmR3TcpReadLine(pVM, szCmd, sizeof(szCmd));
890 if (RT_FAILURE(rc))
891 break;
892
893 if (!strcmp(szCmd, "mem-sync"))
894 {
895 rc = ftmR3TcpWriteACK(pVM);
896 AssertRC(rc);
897 if (RT_FAILURE(rc))
898 continue;
899
900 while (true)
901 {
902 FTMTCPHDRMEM Hdr;
903 void *pPage;
904
905 /* Read memory header. */
906 rc = RTTcpRead(pVM->ftm.s.hSocket, &Hdr, sizeof(Hdr), NULL);
907 if (RT_FAILURE(rc))
908 {
909 Log(("RTTcpRead failed with %Rrc\n", rc));
910 break;
911 }
912 pVM->ftm.s.StatReceivedMem.c += sizeof(Hdr);
913
914 if (Hdr.cb == 0)
915 break; /* end of sync. */
916
917 Assert(Hdr.cb == Hdr.cbPageRange); /** @todo uncompress */
918
919 /* Allocate memory to hold the page(s). */
920 pPage = RTMemAlloc(Hdr.cbPageRange);
921 AssertBreak(pPage);
922
923 /* Fetch the page(s). */
924 rc = RTTcpRead(pVM->ftm.s.hSocket, pPage, Hdr.cb, NULL);
925 if (RT_FAILURE(rc))
926 {
927 Log(("RTTcpRead page data (%d bytes) failed with %Rrc\n", Hdr.cb, rc));
928 break;
929 }
930 pVM->ftm.s.StatReceivedMem.c += Hdr.cb;
931
932 /* Update the guest memory of the standby VM. */
933#if 1
934 rc = PGMR3PhysWriteExternal(pVM, Hdr.GCPhys, pPage, Hdr.cbPageRange, "FTMemSync");
935#else
936 rc = PGMPhysWrite(pVM, Hdr.GCPhys, pPage, Hdr.cbPageRange);
937#endif
938 AssertRC(rc);
939
940 RTMemFree(pPage);
941 }
942
943 rc = ftmR3TcpWriteACK(pVM);
944 AssertRC(rc);
945 }
946 else
947 if ( !strcmp(szCmd, "checkpoint")
948 || !strcmp(szCmd, "full-sync")
949 || (fFullSync = true)) /* intended assignment */
950 {
951 rc = ftmR3TcpWriteACK(pVM);
952 AssertRC(rc);
953 if (RT_FAILURE(rc))
954 continue;
955
956 RTSocketRetain(pVM->ftm.s.hSocket); /* For concurrent access by I/O thread and EMT. */
957
958 /* Reset the sync state. */
959 pVM->ftm.s.syncstate.uOffStream = 0;
960 pVM->ftm.s.syncstate.cbReadBlock = 0;
961 pVM->ftm.s.syncstate.fStopReading = false;
962 pVM->ftm.s.syncstate.fIOError = false;
963 pVM->ftm.s.syncstate.fEndOfStream = false;
964
965 pVM->ftm.s.fDeltaLoadSaveActive = (fFullSync == false);
966 rc = VMR3LoadFromStream(pVM, &g_ftmR3TcpOps, pVM, NULL, NULL);
967 pVM->ftm.s.fDeltaLoadSaveActive = false;
968 RTSocketRelease(pVM->ftm.s.hSocket);
969 AssertRC(rc);
970 if (RT_FAILURE(rc))
971 {
972 LogRel(("FTSync: VMR3LoadFromStream -> %Rrc\n", rc));
973 ftmR3TcpWriteNACK(pVM, rc);
974 continue;
975 }
976
977 /* The EOS might not have been read, make sure it is. */
978 pVM->ftm.s.syncstate.fStopReading = false;
979 size_t cbRead;
980 rc = ftmR3TcpOpRead(pVM, pVM->ftm.s.syncstate.uOffStream, szCmd, 1, &cbRead);
981 if (rc != VERR_EOF)
982 {
983 LogRel(("FTSync: Draining teleporterTcpOpRead -> %Rrc\n", rc));
984 ftmR3TcpWriteNACK(pVM, rc);
985 continue;
986 }
987
988 rc = ftmR3TcpWriteACK(pVM);
989 AssertRC(rc);
990 }
991 }
992 LogFlowFunc(("returns mRc=%Rrc\n", rc));
993 return VERR_TCP_SERVER_STOP;
994}
995
996/**
997 * Powers on the fault tolerant virtual machine.
998 *
999 * @returns VBox status code.
1000 *
1001 * @param pVM The VM to operate on.
1002 * @param fMaster FT master or standby
1003 * @param uInterval FT sync interval
1004 * @param pszAddress Standby VM address
1005 * @param uPort Standby VM port
1006 * @param pszPassword FT password (NULL for none)
1007 *
1008 * @thread Any thread.
1009 * @vmstate Created
1010 * @vmstateto PoweringOn+Running (master), PoweringOn+Running_FT (standby)
1011 */
1012VMMR3DECL(int) FTMR3PowerOn(PVM pVM, bool fMaster, unsigned uInterval, const char *pszAddress, unsigned uPort, const char *pszPassword)
1013{
1014 int rc = VINF_SUCCESS;
1015
1016 VMSTATE enmVMState = VMR3GetState(pVM);
1017 AssertMsgReturn(enmVMState == VMSTATE_CREATED,
1018 ("%s\n", VMR3GetStateName(enmVMState)),
1019 VERR_INTERNAL_ERROR_4);
1020 AssertReturn(pszAddress, VERR_INVALID_PARAMETER);
1021
1022 if (pVM->ftm.s.uInterval)
1023 pVM->ftm.s.uInterval = uInterval;
1024 else
1025 pVM->ftm.s.uInterval = 50; /* standard sync interval of 50ms */
1026
1027 pVM->ftm.s.uPort = uPort;
1028 pVM->ftm.s.pszAddress = RTStrDup(pszAddress);
1029 if (pszPassword)
1030 pVM->ftm.s.pszPassword = RTStrDup(pszPassword);
1031 if (fMaster)
1032 {
1033 rc = RTSemEventCreate(&pVM->ftm.s.master.hShutdownEvent);
1034 if (RT_FAILURE(rc))
1035 return rc;
1036
1037 rc = RTThreadCreate(NULL, ftmR3MasterThread, pVM,
1038 0, RTTHREADTYPE_IO /* higher than normal priority */, 0, "ftmMaster");
1039 if (RT_FAILURE(rc))
1040 return rc;
1041
1042 pVM->fFaultTolerantMaster = true;
1043 if (PGMIsUsingLargePages(pVM))
1044 {
1045 /* Must disable large page usage as 2 MB pages are too big to write monitor. */
1046 LogRel(("FTSync: disabling large page usage.\n"));
1047 PGMSetLargePageUsage(pVM, false);
1048 }
1049 /** @todo might need to disable page fusion as well */
1050
1051 return VMR3PowerOn(pVM);
1052 }
1053 else
1054 {
1055 /* standby */
1056 rc = RTTcpServerCreateEx(pszAddress, uPort, &pVM->ftm.s.standby.hServer);
1057 if (RT_FAILURE(rc))
1058 return rc;
1059 pVM->ftm.s.fIsStandbyNode = true;
1060
1061 rc = RTTcpServerListen(pVM->ftm.s.standby.hServer, ftmR3StandbyServeConnection, pVM);
1062 /** @todo deal with the exit code to check if we should activate this standby VM. */
1063
1064 if (pVM->ftm.s.standby.hServer)
1065 {
1066 RTTcpServerDestroy(pVM->ftm.s.standby.hServer);
1067 pVM->ftm.s.standby.hServer = NULL;
1068 }
1069 if (rc == VERR_TCP_SERVER_SHUTDOWN)
1070 rc = VINF_SUCCESS; /* ignore this error; the standby process was cancelled. */
1071 }
1072 return rc;
1073}
1074
1075/**
1076 * Powers off the fault tolerant virtual machine (standby).
1077 *
1078 * @returns VBox status code.
1079 *
1080 * @param pVM The VM to operate on.
1081 */
1082VMMR3DECL(int) FTMR3CancelStandby(PVM pVM)
1083{
1084 AssertReturn(!pVM->fFaultTolerantMaster, VERR_NOT_SUPPORTED);
1085 Assert(pVM->ftm.s.standby.hServer);
1086
1087 return RTTcpServerShutdown(pVM->ftm.s.standby.hServer);
1088}
1089
1090
1091/**
1092 * Performs a full sync to the standby node
1093 *
1094 * @returns VBox status code.
1095 *
1096 * @param pVM The VM to operate on.
1097 * @param enmCheckpoint Checkpoint type
1098 */
1099VMMR3DECL(int) FTMR3SetCheckpoint(PVM pVM, FTMCHECKPOINTTYPE enmCheckpoint)
1100{
1101 int rc;
1102
1103 if (!pVM->fFaultTolerantMaster)
1104 return VINF_SUCCESS;
1105
1106 switch (enmCheckpoint)
1107 {
1108 case FTMCHECKPOINTTYPE_NETWORK:
1109 STAM_REL_COUNTER_INC(&pVM->ftm.s.StatCheckpointNetwork);
1110 break;
1111
1112 case FTMCHECKPOINTTYPE_STORAGE:
1113 STAM_REL_COUNTER_INC(&pVM->ftm.s.StatCheckpointStorage);
1114 break;
1115
1116 default:
1117 break;
1118 }
1119 pVM->ftm.s.fCheckpointingActive = true;
1120 if (VM_IS_EMT(pVM))
1121 {
1122 PVMCPU pVCpu = VMMGetCpu(pVM);
1123
1124 /* We must take special care here as the memory sync is competing with us and requires a responsive EMT. */
1125 while ((rc = PDMCritSectTryEnter(&pVM->ftm.s.CritSect)) == VERR_SEM_BUSY)
1126 {
1127 if (VM_FF_ISPENDING(pVM, VM_FF_EMT_RENDEZVOUS))
1128 {
1129 rc = VMMR3EmtRendezvousFF(pVM, pVCpu);
1130 AssertRC(rc);
1131 }
1132
1133 if (VM_FF_ISPENDING(pVM, VM_FF_REQUEST))
1134 {
1135 rc = VMR3ReqProcessU(pVM->pUVM, VMCPUID_ANY);
1136 AssertRC(rc);
1137 }
1138 }
1139 }
1140 else
1141 rc = PDMCritSectEnter(&pVM->ftm.s.CritSect, VERR_SEM_BUSY);
1142
1143 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc));
1144
1145 /* Sync state + changed memory with the standby node. */
1146 rc = ftmR3PerformSync(pVM, FTMSYNCSTATE_DELTA_VM);
1147
1148 PDMCritSectLeave(&pVM->ftm.s.CritSect);
1149 pVM->ftm.s.fCheckpointingActive = false;
1150
1151 return rc;
1152}
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette