VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/SrvIntNetR0.cpp@ 60404

Last change on this file since 60404 was 57846, checked in by vboxsync, 9 years ago

SrvIntNetR0: intnetR0NetworkEditDhcpFromIntNet - update UDP checksum
only if present.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 239.3 KB
Line 
1/* $Id: SrvIntNetR0.cpp 57846 2015-09-21 19:50:36Z vboxsync $ */
2/** @file
3 * Internal networking - The ring 0 service.
4 *
5 * @remarks No lazy code changes. If you don't understand exactly what you're
6 * doing, get an understanding or forget it.
7 * All changes shall be reviewed by bird before commit. If not around,
8 * email and let Frank and/or Klaus OK the changes before committing.
9 */
10
11/*
12 * Copyright (C) 2006-2015 Oracle Corporation
13 *
14 * This file is part of VirtualBox Open Source Edition (OSE), as
15 * available from http://www.virtualbox.org. This file is free software;
16 * you can redistribute it and/or modify it under the terms of the GNU
17 * General Public License (GPL) as published by the Free Software
18 * Foundation, in version 2 as it comes in the "COPYING" file of the
19 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
20 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
21 */
22
23
24/*********************************************************************************************************************************
25* Header Files *
26*********************************************************************************************************************************/
27#define LOG_GROUP LOG_GROUP_SRV_INTNET
28#include <VBox/intnet.h>
29#include <VBox/intnetinline.h>
30#include <VBox/vmm/pdmnetinline.h>
31#include <VBox/sup.h>
32#include <VBox/vmm/pdm.h>
33#include <VBox/log.h>
34
35#include <iprt/asm.h>
36#include <iprt/assert.h>
37#include <iprt/handletable.h>
38#include <iprt/mp.h>
39#include <iprt/mem.h>
40#include <iprt/net.h>
41#include <iprt/semaphore.h>
42#include <iprt/spinlock.h>
43#include <iprt/string.h>
44#include <iprt/thread.h>
45#include <iprt/time.h>
46
47
48/*********************************************************************************************************************************
49* Defined Constants And Macros *
50*********************************************************************************************************************************/
51/** @def INTNET_WITH_DHCP_SNOOPING
52 * Enabled DHCP snooping when in shared-mac-on-the-wire mode. */
53#define INTNET_WITH_DHCP_SNOOPING
54
55/** The maximum number of interface in a network. */
56#define INTNET_MAX_IFS (1023 + 1 + 16)
57
58/** The number of entries to grow the destination tables with. */
59#if 0
60# define INTNET_GROW_DSTTAB_SIZE 16
61#else
62# define INTNET_GROW_DSTTAB_SIZE 1
63#endif
64
65/** The wakeup bit in the INTNETIF::cBusy and INTNETRUNKIF::cBusy counters. */
66#define INTNET_BUSY_WAKEUP_MASK RT_BIT_32(30)
67
68
69/*********************************************************************************************************************************
70* Structures and Typedefs *
71*********************************************************************************************************************************/
72/**
73 * MAC address lookup table entry.
74 */
75typedef struct INTNETMACTABENTRY
76{
77 /** The MAC address of this entry. */
78 RTMAC MacAddr;
79 /** Is it is effectively promiscuous mode. */
80 bool fPromiscuousEff;
81 /** Is it promiscuous and should it see unrelated trunk traffic. */
82 bool fPromiscuousSeeTrunk;
83 /** Is it active.
84 * We ignore the entry if this is clear and may end up sending packets addressed
85 * to this interface onto the trunk. The reasoning for this is that this could
86 * be the interface of a VM that just has been teleported to a different host. */
87 bool fActive;
88 /** Pointer to the network interface. */
89 struct INTNETIF *pIf;
90} INTNETMACTABENTRY;
91/** Pointer to a MAC address lookup table entry. */
92typedef INTNETMACTABENTRY *PINTNETMACTABENTRY;
93
94/**
95 * MAC address lookup table.
96 *
97 * @todo Having this in a separate structure didn't work out as well as it
98 * should. Consider merging it into INTNETNETWORK.
99 */
100typedef struct INTNETMACTAB
101{
102 /** The current number of entries. */
103 uint32_t cEntries;
104 /** The number of entries we've allocated space for. */
105 uint32_t cEntriesAllocated;
106 /** Table entries. */
107 PINTNETMACTABENTRY paEntries;
108
109 /** The number of interface entries currently in promicuous mode. */
110 uint32_t cPromiscuousEntries;
111 /** The number of interface entries currently in promicuous mode that
112 * shall not see unrelated trunk traffic. */
113 uint32_t cPromiscuousNoTrunkEntries;
114
115 /** The host MAC address (reported). */
116 RTMAC HostMac;
117 /** The effective host promiscuous setting (reported). */
118 bool fHostPromiscuousEff;
119 /** The real host promiscuous setting (reported). */
120 bool fHostPromiscuousReal;
121 /** Whether the host is active. */
122 bool fHostActive;
123
124 /** Whether the wire is promiscuous (config). */
125 bool fWirePromiscuousEff;
126 /** Whether the wire is promiscuous (config).
127 * (Shadows INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE in
128 * INTNETNETWORK::fFlags.) */
129 bool fWirePromiscuousReal;
130 /** Whether the wire is active. */
131 bool fWireActive;
132
133 /** Pointer to the trunk interface. */
134 struct INTNETTRUNKIF *pTrunk;
135} INTNETMACTAB;
136/** Pointer to a MAC address . */
137typedef INTNETMACTAB *PINTNETMACTAB;
138
139/**
140 * Destination table.
141 */
142typedef struct INTNETDSTTAB
143{
144 /** The trunk destinations. */
145 uint32_t fTrunkDst;
146 /** Pointer to the trunk interface (referenced) if fTrunkDst is non-zero. */
147 struct INTNETTRUNKIF *pTrunk;
148 /** The number of destination interfaces. */
149 uint32_t cIfs;
150 /** The interfaces (referenced). Variable sized array. */
151 struct
152 {
153 /** The destination interface. */
154 struct INTNETIF *pIf;
155 /** Whether to replace the destination MAC address.
156 * This is used when sharing MAC address with the host on the wire(less). */
157 bool fReplaceDstMac;
158 } aIfs[1];
159} INTNETDSTTAB;
160/** Pointer to a destination table. */
161typedef INTNETDSTTAB *PINTNETDSTTAB;
162/** Pointer to a const destination table. */
163typedef INTNETDSTTAB const *PCINTNETDSTTAB;
164
165/**
166 * Address and type.
167 */
168typedef struct INTNETADDR
169{
170 /** The address type. */
171 INTNETADDRTYPE enmType;
172 /** The address. */
173 RTNETADDRU Addr;
174} INTNETADDR;
175/** Pointer to an address. */
176typedef INTNETADDR *PINTNETADDR;
177/** Pointer to a const address. */
178typedef INTNETADDR const *PCINTNETADDR;
179
180
181/**
182 * Address cache for a specific network layer.
183 */
184typedef struct INTNETADDRCACHE
185{
186 /** Pointer to the table of addresses. */
187 uint8_t *pbEntries;
188 /** The number of valid address entries. */
189 uint8_t cEntries;
190 /** The number of allocated address entries. */
191 uint8_t cEntriesAlloc;
192 /** The address size. */
193 uint8_t cbAddress;
194 /** The size of an entry. */
195 uint8_t cbEntry;
196} INTNETADDRCACHE;
197/** Pointer to an address cache. */
198typedef INTNETADDRCACHE *PINTNETADDRCACHE;
199/** Pointer to a const address cache. */
200typedef INTNETADDRCACHE const *PCINTNETADDRCACHE;
201
202
203/**
204 * A network interface.
205 *
206 * Unless explicitly stated, all members are protect by the network semaphore.
207 */
208typedef struct INTNETIF
209{
210 /** The MAC address.
211 * This is shadowed by INTNETMACTABENTRY::MacAddr. */
212 RTMAC MacAddr;
213 /** Set if the INTNET::MacAddr member has been explicitly set. */
214 bool fMacSet;
215 /** Tracks the desired promiscuous setting of the interface. */
216 bool fPromiscuousReal;
217 /** Whether the interface is active or not.
218 * This is shadowed by INTNETMACTABENTRY::fActive. */
219 bool fActive;
220 /** Whether someone is currently in the destructor or has indicated that
221 * the end is nigh by means of IntNetR0IfAbortWait. */
222 bool volatile fDestroying;
223 /** The flags specified when opening this interface. */
224 uint32_t fOpenFlags;
225 /** Number of yields done to try make the interface read pending data.
226 * We will stop yielding when this reaches a threshold assuming that the VM is
227 * paused or that it simply isn't worth all the delay. It is cleared when a
228 * successful send has been done. */
229 uint32_t cYields;
230 /** Pointer to the current exchange buffer (ring-0). */
231 PINTNETBUF pIntBuf;
232 /** Pointer to ring-3 mapping of the current exchange buffer. */
233 R3PTRTYPE(PINTNETBUF) pIntBufR3;
234 /** Pointer to the default exchange buffer for the interface. */
235 PINTNETBUF pIntBufDefault;
236 /** Pointer to ring-3 mapping of the default exchange buffer. */
237 R3PTRTYPE(PINTNETBUF) pIntBufDefaultR3;
238 /** Event semaphore which a receiver/consumer thread will sleep on while
239 * waiting for data to arrive. */
240 RTSEMEVENT volatile hRecvEvent;
241 /** Number of threads sleeping on the event semaphore. */
242 uint32_t cSleepers;
243 /** The interface handle.
244 * When this is INTNET_HANDLE_INVALID a sleeper which is waking up
245 * should return with the appropriate error condition. */
246 INTNETIFHANDLE volatile hIf;
247 /** Pointer to the network this interface is connected to.
248 * This is protected by the INTNET::hMtxCreateOpenDestroy. */
249 struct INTNETNETWORK *pNetwork;
250 /** The session this interface is associated with. */
251 PSUPDRVSESSION pSession;
252 /** The SUPR0 object id. */
253 void *pvObj;
254 /** The network layer address cache. (Indexed by type, 0 entry isn't used.)
255 * This is protected by the address spinlock of the network. */
256 INTNETADDRCACHE aAddrCache[kIntNetAddrType_End];
257 /** Spinlock protecting the input (producer) side of the receive ring. */
258 RTSPINLOCK hRecvInSpinlock;
259 /** Busy count for tracking destination table references and active sends.
260 * Usually incremented while owning the switch table spinlock. The 30th bit
261 * is used to indicate wakeup. */
262 uint32_t volatile cBusy;
263 /** The preallocated destination table.
264 * This is NULL when it's in use as a precaution against unserialized
265 * transmitting. This is grown when new interfaces are added to the network. */
266 PINTNETDSTTAB volatile pDstTab;
267 /** Pointer to the trunk's per interface data. Can be NULL. */
268 void *pvIfData;
269 /** Header buffer for when we're carving GSO frames. */
270 uint8_t abGsoHdrs[256];
271} INTNETIF;
272/** Pointer to an internal network interface. */
273typedef INTNETIF *PINTNETIF;
274
275
276/**
277 * A trunk interface.
278 */
279typedef struct INTNETTRUNKIF
280{
281 /** The port interface we present to the component. */
282 INTNETTRUNKSWPORT SwitchPort;
283 /** The port interface we get from the component. */
284 PINTNETTRUNKIFPORT pIfPort;
285 /** Pointer to the network we're connect to.
286 * This may be NULL if we're orphaned? */
287 struct INTNETNETWORK *pNetwork;
288 /** The current MAC address for the interface. (reported)
289 * Updated while owning the switch table spinlock. */
290 RTMAC MacAddr;
291 /** Whether to supply physical addresses with the outbound SGs. (reported) */
292 bool fPhysSG;
293 /** Explicit alignment. */
294 bool fUnused;
295 /** Busy count for tracking destination table references and active sends.
296 * Usually incremented while owning the switch table spinlock. The 30th bit
297 * is used to indicate wakeup. */
298 uint32_t volatile cBusy;
299 /** Mask of destinations that pfnXmit cope with disabled preemption for. */
300 uint32_t fNoPreemptDsts;
301 /** The GSO capabilities of the wire destination. (reported) */
302 uint32_t fWireGsoCapabilites;
303 /** The GSO capabilities of the host destination. (reported)
304 * This is as bit map where each bit represents the GSO type with the same
305 * number. */
306 uint32_t fHostGsoCapabilites;
307 /** The destination table spinlock, interrupt safe.
308 * Protects apTaskDstTabs and apIntDstTabs. */
309 RTSPINLOCK hDstTabSpinlock;
310 /** The number of entries in apIntDstTabs. */
311 uint32_t cIntDstTabs;
312 /** The task time destination tables.
313 * @remarks intnetR0NetworkEnsureTabSpace and others ASSUMES this immediately
314 * precedes apIntDstTabs so that these two tables can be used as one
315 * contiguous one. */
316 PINTNETDSTTAB apTaskDstTabs[2];
317 /** The interrupt / disabled-preemption time destination tables.
318 * This is a variable sized array. */
319 PINTNETDSTTAB apIntDstTabs[1];
320} INTNETTRUNKIF;
321/** Pointer to a trunk interface. */
322typedef INTNETTRUNKIF *PINTNETTRUNKIF;
323
324/** Converts a pointer to INTNETTRUNKIF::SwitchPort to a PINTNETTRUNKIF. */
325#define INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort) ((PINTNETTRUNKIF)(pSwitchPort))
326
327
328/**
329 * Internal representation of a network.
330 */
331typedef struct INTNETNETWORK
332{
333 /** The Next network in the chain.
334 * This is protected by the INTNET::hMtxCreateOpenDestroy. */
335 struct INTNETNETWORK *pNext;
336
337 /** The spinlock protecting MacTab, aAddrBlacklist and INTNETIF::aAddrCache.
338 * Interrupt safe. */
339 RTSPINLOCK hAddrSpinlock;
340 /** MAC address table.
341 * This doubles as interface collection. */
342 INTNETMACTAB MacTab;
343
344 /** The network layer address cache. (Indexed by type, 0 entry isn't used.
345 * Contains host addresses. We don't let guests spoof them. */
346 INTNETADDRCACHE aAddrBlacklist[kIntNetAddrType_End];
347
348 /** Wait for an interface to stop being busy so it can be removed or have its
349 * destination table replaced. We have to wait upon this while owning the
350 * network mutex. Will only ever have one waiter because of the big mutex. */
351 RTSEMEVENT hEvtBusyIf;
352 /** Pointer to the instance data. */
353 struct INTNET *pIntNet;
354 /** The SUPR0 object id. */
355 void *pvObj;
356 /** Pointer to the temporary buffer that is used when snooping fragmented packets.
357 * This is allocated after this structure if we're sharing the MAC address with
358 * the host. The buffer is INTNETNETWORK_TMP_SIZE big and aligned on a 64-byte boundary. */
359 uint8_t *pbTmp;
360 /** Network creation flags (INTNET_OPEN_FLAGS_*). */
361 uint32_t fFlags;
362 /** Any restrictive policies required as a minimum by some interface.
363 * (INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES) */
364 uint32_t fMinFlags;
365 /** The number of active interfaces (excluding the trunk). */
366 uint32_t cActiveIFs;
367 /** The length of the network name. */
368 uint8_t cchName;
369 /** The network name. */
370 char szName[INTNET_MAX_NETWORK_NAME];
371 /** The trunk type. */
372 INTNETTRUNKTYPE enmTrunkType;
373 /** The trunk name. */
374 char szTrunk[INTNET_MAX_TRUNK_NAME];
375} INTNETNETWORK;
376/** Pointer to an internal network. */
377typedef INTNETNETWORK *PINTNETNETWORK;
378/** Pointer to a const internal network. */
379typedef const INTNETNETWORK *PCINTNETNETWORK;
380
381/** The size of the buffer INTNETNETWORK::pbTmp points at. */
382#define INTNETNETWORK_TMP_SIZE 2048
383
384
385/**
386 * Internal networking instance.
387 */
388typedef struct INTNET
389{
390 /** Magic number (INTNET_MAGIC). */
391 uint32_t volatile u32Magic;
392 /** Mutex protecting the creation, opening and destruction of both networks and
393 * interfaces. (This means all operations affecting the pNetworks list.) */
394 RTSEMMUTEX hMtxCreateOpenDestroy;
395 /** List of networks. Protected by INTNET::Spinlock. */
396 PINTNETNETWORK volatile pNetworks;
397 /** Handle table for the interfaces. */
398 RTHANDLETABLE hHtIfs;
399} INTNET;
400/** Pointer to an internal network ring-0 instance. */
401typedef struct INTNET *PINTNET;
402
403/** Magic number for the internal network instance data (Hayao Miyazaki). */
404#define INTNET_MAGIC UINT32_C(0x19410105)
405
406
407/*********************************************************************************************************************************
408* Global Variables *
409*********************************************************************************************************************************/
410/** Pointer to the internal network instance data. */
411static PINTNET volatile g_pIntNet = NULL;
412
413static const struct INTNETOPENNETWORKFLAGS
414{
415 uint32_t fRestrictive; /**< The restrictive flag (deny/disabled). */
416 uint32_t fRelaxed; /**< The relaxed flag (allow/enabled). */
417 uint32_t fFixed; /**< The config-fixed flag. */
418 uint32_t fPair; /**< The pair of restrictive and relaxed flags. */
419}
420/** Open network policy flags relating to the network. */
421g_afIntNetOpenNetworkNetFlags[] =
422{
423 { INTNET_OPEN_FLAGS_ACCESS_RESTRICTED, INTNET_OPEN_FLAGS_ACCESS_PUBLIC, INTNET_OPEN_FLAGS_ACCESS_FIXED, INTNET_OPEN_FLAGS_ACCESS_RESTRICTED | INTNET_OPEN_FLAGS_ACCESS_PUBLIC },
424 { INTNET_OPEN_FLAGS_PROMISC_DENY_CLIENTS, INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS, INTNET_OPEN_FLAGS_PROMISC_FIXED, INTNET_OPEN_FLAGS_PROMISC_DENY_CLIENTS | INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS },
425 { INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_HOST, INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST, INTNET_OPEN_FLAGS_PROMISC_FIXED, INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_HOST | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST },
426 { INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_WIRE, INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE, INTNET_OPEN_FLAGS_PROMISC_FIXED, INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_WIRE | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE },
427 { INTNET_OPEN_FLAGS_TRUNK_HOST_DISABLED, INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_HOST_DISABLED | INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED },
428 { INTNET_OPEN_FLAGS_TRUNK_HOST_CHASTE_MODE, INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_HOST_CHASTE_MODE | INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE },
429 { INTNET_OPEN_FLAGS_TRUNK_WIRE_DISABLED, INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_WIRE_DISABLED | INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED },
430 { INTNET_OPEN_FLAGS_TRUNK_WIRE_CHASTE_MODE, INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_WIRE_CHASTE_MODE | INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE },
431},
432/** Open network policy flags relating to the new interface. */
433g_afIntNetOpenNetworkIfFlags[] =
434{
435 { INTNET_OPEN_FLAGS_IF_PROMISC_DENY, INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW, INTNET_OPEN_FLAGS_IF_FIXED, INTNET_OPEN_FLAGS_IF_PROMISC_DENY | INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW },
436 { INTNET_OPEN_FLAGS_IF_PROMISC_NO_TRUNK, INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK, INTNET_OPEN_FLAGS_IF_FIXED, INTNET_OPEN_FLAGS_IF_PROMISC_NO_TRUNK | INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK },
437};
438
439
440/*********************************************************************************************************************************
441* Forward Declarations *
442*********************************************************************************************************************************/
443static void intnetR0TrunkIfDestroy(PINTNETTRUNKIF pThis, PINTNETNETWORK pNetwork);
444
445
446/**
447 * Checks if a pointer belongs to the list of known networks without
448 * accessing memory it points to.
449 *
450 * @returns true, if such network is in the list.
451 * @param pIntNet The pointer to the internal network instance (global).
452 * @param pNetwork The pointer that must be validated.
453 */
454DECLINLINE(bool) intnetR0NetworkIsValid(PINTNET pIntNet, PINTNETNETWORK pNetwork)
455{
456 for (PINTNETNETWORK pCurr = pIntNet->pNetworks; pCurr; pCurr = pCurr->pNext)
457 if (pCurr == pNetwork)
458 return true;
459 return false;
460}
461
462
463/**
464 * Worker for intnetR0SgWritePart that deals with the case where the
465 * request doesn't fit into the first segment.
466 *
467 * @returns true, unless the request or SG invalid.
468 * @param pSG The SG list to write to.
469 * @param off Where to start writing (offset into the SG).
470 * @param cb How much to write.
471 * @param pvBuf The buffer to containing the bits to write.
472 */
473static bool intnetR0SgWritePartSlow(PCINTNETSG pSG, uint32_t off, uint32_t cb, void const *pvBuf)
474{
475 if (RT_UNLIKELY(off + cb > pSG->cbTotal))
476 return false;
477
478 /*
479 * Skip ahead to the segment where off starts.
480 */
481 unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
482 unsigned iSeg = 0;
483 while (off > pSG->aSegs[iSeg].cb)
484 {
485 off -= pSG->aSegs[iSeg++].cb;
486 AssertReturn(iSeg < cSegs, false);
487 }
488
489 /*
490 * Copy the data, hoping that it's all from one segment...
491 */
492 uint32_t cbCanCopy = pSG->aSegs[iSeg].cb - off;
493 if (cbCanCopy >= cb)
494 memcpy((uint8_t *)pSG->aSegs[iSeg].pv + off, pvBuf, cb);
495 else
496 {
497 /* copy the portion in the current segment. */
498 memcpy((uint8_t *)pSG->aSegs[iSeg].pv + off, pvBuf, cbCanCopy);
499 cb -= cbCanCopy;
500
501 /* copy the portions in the other segments. */
502 do
503 {
504 pvBuf = (uint8_t const *)pvBuf + cbCanCopy;
505 iSeg++;
506 AssertReturn(iSeg < cSegs, false);
507
508 cbCanCopy = RT_MIN(cb, pSG->aSegs[iSeg].cb);
509 memcpy(pSG->aSegs[iSeg].pv, pvBuf, cbCanCopy);
510
511 cb -= cbCanCopy;
512 } while (cb > 0);
513 }
514
515 return true;
516}
517
518
519/**
520 * Writes to a part of an SG.
521 *
522 * @returns true on success, false on failure (out of bounds).
523 * @param pSG The SG list to write to.
524 * @param off Where to start writing (offset into the SG).
525 * @param cb How much to write.
526 * @param pvBuf The buffer to containing the bits to write.
527 */
528DECLINLINE(bool) intnetR0SgWritePart(PCINTNETSG pSG, uint32_t off, uint32_t cb, void const *pvBuf)
529{
530 Assert(off + cb > off);
531
532 /* The optimized case. */
533 if (RT_LIKELY( pSG->cSegsUsed == 1
534 || pSG->aSegs[0].cb >= off + cb))
535 {
536 Assert(pSG->cbTotal == pSG->aSegs[0].cb);
537 memcpy((uint8_t *)pSG->aSegs[0].pv + off, pvBuf, cb);
538 return true;
539 }
540 return intnetR0SgWritePartSlow(pSG, off, cb, pvBuf);
541}
542
543
544/**
545 * Reads a byte from a SG list.
546 *
547 * @returns The byte on success. 0xff on failure.
548 * @param pSG The SG list to read.
549 * @param off The offset (into the SG) off the byte.
550 */
551DECLINLINE(uint8_t) intnetR0SgReadByte(PCINTNETSG pSG, uint32_t off)
552{
553 if (RT_LIKELY(pSG->aSegs[0].cb > off))
554 return ((uint8_t const *)pSG->aSegs[0].pv)[off];
555
556 off -= pSG->aSegs[0].cb;
557 unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
558 for (unsigned iSeg = 1; iSeg < cSegs; iSeg++)
559 {
560 if (pSG->aSegs[iSeg].cb > off)
561 return ((uint8_t const *)pSG->aSegs[iSeg].pv)[off];
562 off -= pSG->aSegs[iSeg].cb;
563 }
564 return false;
565}
566
567
568/**
569 * Worker for intnetR0SgReadPart that deals with the case where the
570 * requested data isn't in the first segment.
571 *
572 * @returns true, unless the SG is invalid.
573 * @param pSG The SG list to read.
574 * @param off Where to start reading (offset into the SG).
575 * @param cb How much to read.
576 * @param pvBuf The buffer to read into.
577 */
578static bool intnetR0SgReadPartSlow(PCINTNETSG pSG, uint32_t off, uint32_t cb, void *pvBuf)
579{
580 if (RT_UNLIKELY(off + cb > pSG->cbTotal))
581 return false;
582
583 /*
584 * Skip ahead to the segment where off starts.
585 */
586 unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
587 unsigned iSeg = 0;
588 while (off > pSG->aSegs[iSeg].cb)
589 {
590 off -= pSG->aSegs[iSeg++].cb;
591 AssertReturn(iSeg < cSegs, false);
592 }
593
594 /*
595 * Copy the data, hoping that it's all from one segment...
596 */
597 uint32_t cbCanCopy = pSG->aSegs[iSeg].cb - off;
598 if (cbCanCopy >= cb)
599 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv + off, cb);
600 else
601 {
602 /* copy the portion in the current segment. */
603 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv + off, cbCanCopy);
604 cb -= cbCanCopy;
605
606 /* copy the portions in the other segments. */
607 do
608 {
609 pvBuf = (uint8_t *)pvBuf + cbCanCopy;
610 iSeg++;
611 AssertReturn(iSeg < cSegs, false);
612
613 cbCanCopy = RT_MIN(cb, pSG->aSegs[iSeg].cb);
614 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv, cbCanCopy);
615
616 cb -= cbCanCopy;
617 } while (cb > 0);
618 }
619
620 return true;
621}
622
623
624/**
625 * Reads a part of an SG into a buffer.
626 *
627 * @returns true on success, false on failure (out of bounds).
628 * @param pSG The SG list to read.
629 * @param off Where to start reading (offset into the SG).
630 * @param cb How much to read.
631 * @param pvBuf The buffer to read into.
632 */
633DECLINLINE(bool) intnetR0SgReadPart(PCINTNETSG pSG, uint32_t off, uint32_t cb, void *pvBuf)
634{
635 Assert(off + cb > off);
636
637 /* The optimized case. */
638 if (RT_LIKELY( pSG->cSegsUsed == 1
639 || pSG->aSegs[0].cb >= off + cb))
640 {
641 Assert(pSG->cbTotal == pSG->aSegs[0].cb);
642 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[0].pv + off, cb);
643 return true;
644 }
645 return intnetR0SgReadPartSlow(pSG, off, cb, pvBuf);
646}
647
648
649/**
650 * Wait for a busy counter to reach zero.
651 *
652 * @param pNetwork The network.
653 * @param pcBusy The busy counter.
654 */
655static void intnetR0BusyWait(PINTNETNETWORK pNetwork, uint32_t volatile *pcBusy)
656{
657 if (ASMAtomicReadU32(pcBusy) == 0)
658 return;
659
660 /*
661 * We have to be a bit cautious here so we don't destroy the network or the
662 * semaphore before intnetR0BusyDec has signalled us.
663 */
664
665 /* Reset the semaphore and flip the wakeup bit. */
666 RTSemEventWait(pNetwork->hEvtBusyIf, 0); /* clear it */
667 uint32_t cCurBusy = ASMAtomicReadU32(pcBusy);
668 do
669 {
670 if (cCurBusy == 0)
671 return;
672 AssertMsg(!(cCurBusy & INTNET_BUSY_WAKEUP_MASK), ("%#x\n", cCurBusy));
673 AssertMsg((cCurBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cCurBusy));
674 } while (!ASMAtomicCmpXchgExU32(pcBusy, cCurBusy | INTNET_BUSY_WAKEUP_MASK, cCurBusy, &cCurBusy));
675
676 /* Wait for the count to reach zero. */
677 do
678 {
679 int rc2 = RTSemEventWait(pNetwork->hEvtBusyIf, 30000); NOREF(rc2);
680 //AssertMsg(RT_SUCCESS(rc2), ("rc=%Rrc *pcBusy=%#x (%#x)\n", rc2, ASMAtomicReadU32(pcBusy), cCurBusy ));
681 cCurBusy = ASMAtomicReadU32(pcBusy);
682 AssertMsg((cCurBusy & INTNET_BUSY_WAKEUP_MASK), ("%#x\n", cCurBusy));
683 AssertMsg((cCurBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cCurBusy));
684 } while ( cCurBusy != INTNET_BUSY_WAKEUP_MASK
685 || !ASMAtomicCmpXchgU32(pcBusy, 0, INTNET_BUSY_WAKEUP_MASK));
686}
687
688
689/**
690 * Decrements the busy counter and maybe wakes up any threads waiting for it to
691 * reach zero.
692 *
693 * @param pNetwork The network.
694 * @param pcBusy The busy counter.
695 */
696DECLINLINE(void) intnetR0BusyDec(PINTNETNETWORK pNetwork, uint32_t volatile *pcBusy)
697{
698 uint32_t cNewBusy = ASMAtomicDecU32(pcBusy);
699 if (RT_UNLIKELY( cNewBusy == INTNET_BUSY_WAKEUP_MASK
700 && pNetwork))
701 RTSemEventSignal(pNetwork->hEvtBusyIf);
702 AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
703}
704
705
706/**
707 * Increments the busy count of the specified interface.
708 *
709 * The caller must own the MAC address table spinlock.
710 *
711 * @param pIf The interface.
712 */
713DECLINLINE(void) intnetR0BusyDecIf(PINTNETIF pIf)
714{
715 intnetR0BusyDec(pIf->pNetwork, &pIf->cBusy);
716}
717
718
719/**
720 * Increments the busy count of the specified interface.
721 *
722 * The caller must own the MAC address table spinlock or an explicity reference.
723 *
724 * @param pTrunk The trunk.
725 */
726DECLINLINE(void) intnetR0BusyDecTrunk(PINTNETTRUNKIF pTrunk)
727{
728 if (pTrunk)
729 intnetR0BusyDec(pTrunk->pNetwork, &pTrunk->cBusy);
730}
731
732
733/**
734 * Increments the busy count of the specified interface.
735 *
736 * The caller must own the MAC address table spinlock or an explicity reference.
737 *
738 * @param pIf The interface.
739 */
740DECLINLINE(void) intnetR0BusyIncIf(PINTNETIF pIf)
741{
742 uint32_t cNewBusy = ASMAtomicIncU32(&pIf->cBusy);
743 AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
744 NOREF(cNewBusy);
745}
746
747
748/**
749 * Increments the busy count of the specified interface.
750 *
751 * The caller must own the MAC address table spinlock or an explicity reference.
752 *
753 * @param pTrunk The trunk.
754 */
755DECLINLINE(void) intnetR0BusyIncTrunk(PINTNETTRUNKIF pTrunk)
756{
757 if (!pTrunk) return;
758 uint32_t cNewBusy = ASMAtomicIncU32(&pTrunk->cBusy);
759 AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
760 NOREF(cNewBusy);
761}
762
763
764/**
765 * Retain an interface.
766 *
767 * @returns VBox status code, can assume success in most situations.
768 * @param pIf The interface instance.
769 * @param pSession The current session.
770 */
771DECLINLINE(int) intnetR0IfRetain(PINTNETIF pIf, PSUPDRVSESSION pSession)
772{
773 int rc = SUPR0ObjAddRefEx(pIf->pvObj, pSession, true /* fNoBlocking */);
774 AssertRCReturn(rc, rc);
775 return VINF_SUCCESS;
776}
777
778
779/**
780 * Release an interface previously retained by intnetR0IfRetain or
781 * by handle lookup/freeing.
782 *
783 * @returns true if destroyed, false if not.
784 * @param pIf The interface instance.
785 * @param pSession The current session.
786 */
787DECLINLINE(bool) intnetR0IfRelease(PINTNETIF pIf, PSUPDRVSESSION pSession)
788{
789 int rc = SUPR0ObjRelease(pIf->pvObj, pSession);
790 AssertRC(rc);
791 return rc == VINF_OBJECT_DESTROYED;
792}
793
794
795/**
796 * RTHandleCreateEx callback that retains an object in the
797 * handle table before returning it.
798 *
799 * (Avoids racing the freeing of the handle.)
800 *
801 * @returns VBox status code.
802 * @param hHandleTable The handle table (ignored).
803 * @param pvObj The object (INTNETIF).
804 * @param pvCtx The context (SUPDRVSESSION).
805 * @param pvUser The user context (ignored).
806 */
807static DECLCALLBACK(int) intnetR0IfRetainHandle(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser)
808{
809 NOREF(pvUser);
810 NOREF(hHandleTable);
811 PINTNETIF pIf = (PINTNETIF)pvObj;
812 if (pIf->hIf != INTNET_HANDLE_INVALID) /* Don't try retain it if called from intnetR0IfDestruct. */
813 return intnetR0IfRetain(pIf, (PSUPDRVSESSION)pvCtx);
814 return VINF_SUCCESS;
815}
816
817
818
819/**
820 * Checks if the interface has a usable MAC address or not.
821 *
822 * @returns true if MacAddr is usable, false if not.
823 * @param pIf The interface.
824 */
825DECL_FORCE_INLINE(bool) intnetR0IfHasMacAddr(PINTNETIF pIf)
826{
827 return pIf->fMacSet || !(pIf->MacAddr.au8[0] & 1);
828}
829
830
831/**
832 * Locates the MAC address table entry for the given interface.
833 *
834 * The caller holds the MAC address table spinlock, obviously.
835 *
836 * @returns Pointer to the entry on if found, NULL if not.
837 * @param pNetwork The network.
838 * @param pIf The interface.
839 */
840DECLINLINE(PINTNETMACTABENTRY) intnetR0NetworkFindMacAddrEntry(PINTNETNETWORK pNetwork, PINTNETIF pIf)
841{
842 uint32_t iIf = pNetwork->MacTab.cEntries;
843 while (iIf-- > 0)
844 {
845 if (pNetwork->MacTab.paEntries[iIf].pIf == pIf)
846 return &pNetwork->MacTab.paEntries[iIf];
847 }
848 return NULL;
849}
850
851
852/**
853 * Checks if the IPv6 address is a good interface address.
854 * @returns true/false.
855 * @param addr The address, network endian.
856 */
857DECLINLINE(bool) intnetR0IPv6AddrIsGood(RTNETADDRIPV6 addr)
858{
859 return !( ( addr.QWords.qw0 == 0 && addr.QWords.qw1 == 0) /* :: */
860 || ( (addr.Words.w0 & RT_H2BE_U16(0xff00)) == RT_H2BE_U16(0xff00)) /* multicast */
861 || ( addr.Words.w0 == 0 && addr.Words.w1 == 0
862 && addr.Words.w2 == 0 && addr.Words.w3 == 0
863 && addr.Words.w4 == 0 && addr.Words.w5 == 0
864 && addr.Words.w6 == 0 && addr.Words.w7 == RT_H2BE_U16(0x0001))); /* ::1 */
865}
866
867
868/**
869 * Checks if the IPv4 address is a broadcast address.
870 * @returns true/false.
871 * @param Addr The address, network endian.
872 */
873DECLINLINE(bool) intnetR0IPv4AddrIsBroadcast(RTNETADDRIPV4 Addr)
874{
875 /* Just check for 255.255.255.255 atm. */
876 return Addr.u == UINT32_MAX;
877}
878
879
880/**
881 * Checks if the IPv4 address is a good interface address.
882 * @returns true/false.
883 * @param Addr The address, network endian.
884 */
885DECLINLINE(bool) intnetR0IPv4AddrIsGood(RTNETADDRIPV4 Addr)
886{
887 /* Usual suspects. */
888 if ( Addr.u == UINT32_MAX /* 255.255.255.255 - broadcast. */
889 || Addr.au8[0] == 0) /* Current network, can be used as source address. */
890 return false;
891
892 /* Unusual suspects. */
893 if (RT_UNLIKELY( Addr.au8[0] == 127 /* Loopback */
894 || (Addr.au8[0] & 0xf0) == 224 /* Multicast */
895 ))
896 return false;
897 return true;
898}
899
900
901/**
902 * Gets the address size of a network layer type.
903 *
904 * @returns size in bytes.
905 * @param enmType The type.
906 */
907DECLINLINE(uint8_t) intnetR0AddrSize(INTNETADDRTYPE enmType)
908{
909 switch (enmType)
910 {
911 case kIntNetAddrType_IPv4: return 4;
912 case kIntNetAddrType_IPv6: return 16;
913 case kIntNetAddrType_IPX: return 4 + 6;
914 default: AssertFailedReturn(0);
915 }
916}
917
918
919/**
920 * Compares two address to see if they are equal, assuming naturally align structures.
921 *
922 * @returns true if equal, false if not.
923 * @param pAddr1 The first address.
924 * @param pAddr2 The second address.
925 * @param cbAddr The address size.
926 */
927DECLINLINE(bool) intnetR0AddrUIsEqualEx(PCRTNETADDRU pAddr1, PCRTNETADDRU pAddr2, uint8_t const cbAddr)
928{
929 switch (cbAddr)
930 {
931 case 4: /* IPv4 */
932 return pAddr1->au32[0] == pAddr2->au32[0];
933 case 16: /* IPv6 */
934 return pAddr1->au64[0] == pAddr2->au64[0]
935 && pAddr1->au64[1] == pAddr2->au64[1];
936 case 10: /* IPX */
937 return pAddr1->au64[0] == pAddr2->au64[0]
938 && pAddr1->au16[4] == pAddr2->au16[4];
939 default:
940 AssertFailedReturn(false);
941 }
942}
943
944
945/**
946 * Worker for intnetR0IfAddrCacheLookup that performs the lookup
947 * in the remaining cache entries after the caller has check the
948 * most likely ones.
949 *
950 * @returns -1 if not found, the index of the cache entry if found.
951 * @param pCache The cache.
952 * @param pAddr The address.
953 * @param cbAddr The address size (optimization).
954 */
955static int intnetR0IfAddrCacheLookupSlow(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
956{
957 unsigned i = pCache->cEntries - 2;
958 uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
959 while (i >= 1)
960 {
961 if (intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr))
962 return i;
963 pbEntry -= pCache->cbEntry;
964 i--;
965 }
966
967 return -1;
968}
969
970/**
971 * Lookup an address in a cache without any expectations.
972 *
973 * @returns -1 if not found, the index of the cache entry if found.
974 * @param pCache The cache.
975 * @param pAddr The address.
976 * @param cbAddr The address size (optimization).
977 */
978DECLINLINE(int) intnetR0IfAddrCacheLookup(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
979{
980 Assert(pCache->cbAddress == cbAddr);
981
982 /*
983 * The optimized case is when there is one cache entry and
984 * it doesn't match.
985 */
986 unsigned i = pCache->cEntries;
987 if ( i > 0
988 && intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr))
989 return 0;
990 if (i <= 1)
991 return -1;
992
993 /*
994 * Check the last entry.
995 */
996 i--;
997 if (intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * i), pAddr, cbAddr))
998 return i;
999 if (i <= 1)
1000 return -1;
1001
1002 return intnetR0IfAddrCacheLookupSlow(pCache, pAddr, cbAddr);
1003}
1004
1005
1006/** Same as intnetR0IfAddrCacheLookup except we expect the address to be present already. */
1007DECLINLINE(int) intnetR0IfAddrCacheLookupLikely(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
1008{
1009 /** @todo implement this. */
1010 return intnetR0IfAddrCacheLookup(pCache, pAddr, cbAddr);
1011}
1012
1013
1014/**
1015 * Worker for intnetR0IfAddrCacheLookupUnlikely that performs
1016 * the lookup in the remaining cache entries after the caller
1017 * has check the most likely ones.
1018 *
1019 * The routine is expecting not to find the address.
1020 *
1021 * @returns -1 if not found, the index of the cache entry if found.
1022 * @param pCache The cache.
1023 * @param pAddr The address.
1024 * @param cbAddr The address size (optimization).
1025 */
1026static int intnetR0IfAddrCacheInCacheUnlikelySlow(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
1027{
1028 /*
1029 * Perform a full table lookup.
1030 */
1031 unsigned i = pCache->cEntries - 2;
1032 uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
1033 while (i >= 1)
1034 {
1035 if (RT_UNLIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr)))
1036 return i;
1037 pbEntry -= pCache->cbEntry;
1038 i--;
1039 }
1040
1041 return -1;
1042}
1043
1044
1045/**
1046 * Lookup an address in a cache expecting not to find it.
1047 *
1048 * @returns -1 if not found, the index of the cache entry if found.
1049 * @param pCache The cache.
1050 * @param pAddr The address.
1051 * @param cbAddr The address size (optimization).
1052 */
1053DECLINLINE(int) intnetR0IfAddrCacheLookupUnlikely(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
1054{
1055 Assert(pCache->cbAddress == cbAddr);
1056
1057 /*
1058 * The optimized case is when there is one cache entry and
1059 * it doesn't match.
1060 */
1061 unsigned i = pCache->cEntries;
1062 if (RT_UNLIKELY( i > 0
1063 && intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr)))
1064 return 0;
1065 if (RT_LIKELY(i <= 1))
1066 return -1;
1067
1068 /*
1069 * Then check the last entry and return if there are just two cache entries.
1070 */
1071 i--;
1072 if (RT_UNLIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * i), pAddr, cbAddr)))
1073 return i;
1074 if (i <= 1)
1075 return -1;
1076
1077 return intnetR0IfAddrCacheInCacheUnlikelySlow(pCache, pAddr, cbAddr);
1078}
1079
1080
1081/**
1082 * Deletes a specific cache entry.
1083 *
1084 * Worker for intnetR0NetworkAddrCacheDelete and intnetR0NetworkAddrCacheDeleteMinusIf.
1085 *
1086 * @param pIf The interface (for logging).
1087 * @param pCache The cache.
1088 * @param iEntry The entry to delete.
1089 * @param pszMsg Log message.
1090 */
1091static void intnetR0IfAddrCacheDeleteIt(PINTNETIF pIf, PINTNETADDRCACHE pCache, int iEntry, const char *pszMsg)
1092{
1093 AssertReturnVoid(iEntry < pCache->cEntries);
1094 AssertReturnVoid(iEntry >= 0);
1095#ifdef LOG_ENABLED
1096 INTNETADDRTYPE enmAddrType = (INTNETADDRTYPE)(uintptr_t)(pCache - &pIf->aAddrCache[0]);
1097 PCRTNETADDRU pAddr = (PCRTNETADDRU)(pCache->pbEntries + iEntry * pCache->cbEntry);
1098 switch (enmAddrType)
1099 {
1100 case kIntNetAddrType_IPv4:
1101 Log(("intnetR0IfAddrCacheDeleteIt: hIf=%#x MAC=%.6Rhxs IPv4 deleted #%d %RTnaipv4 %s\n",
1102 pIf->hIf, &pIf->MacAddr, iEntry, pAddr->IPv4, pszMsg));
1103 break;
1104 case kIntNetAddrType_IPv6:
1105 Log(("intnetR0IfAddrCacheDeleteIt: hIf=%#x MAC=%.6Rhxs IPv6 deleted #%d %RTnaipv6 %s\n",
1106 pIf->hIf, &pIf->MacAddr, iEntry, &pAddr->IPv6, pszMsg));
1107 break;
1108 default:
1109 Log(("intnetR0IfAddrCacheDeleteIt: hIf=%RX32 MAC=%.6Rhxs type=%d #%d %.*Rhxs %s\n",
1110 pIf->hIf, &pIf->MacAddr, enmAddrType, iEntry, pCache->cbAddress, pAddr, pszMsg));
1111 break;
1112 }
1113#endif
1114
1115 pCache->cEntries--;
1116 if (iEntry < pCache->cEntries)
1117 memmove(pCache->pbEntries + iEntry * pCache->cbEntry,
1118 pCache->pbEntries + (iEntry + 1) * pCache->cbEntry,
1119 (pCache->cEntries - iEntry) * pCache->cbEntry);
1120}
1121
1122
1123/**
1124 * Deletes an address from the cache, assuming it isn't actually in the cache.
1125 *
1126 * May or may not own the spinlock when calling this.
1127 *
1128 * @param pIf The interface (for logging).
1129 * @param pCache The cache.
1130 * @param pAddr The address.
1131 * @param cbAddr The address size (optimization).
1132 */
1133DECLINLINE(void) intnetR0IfAddrCacheDelete(PINTNETIF pIf, PINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr, const char *pszMsg)
1134{
1135 int i = intnetR0IfAddrCacheLookup(pCache, pAddr, cbAddr);
1136 if (RT_UNLIKELY(i >= 0))
1137 intnetR0IfAddrCacheDeleteIt(pIf, pCache, i, pszMsg);
1138}
1139
1140
1141/**
1142 * Deletes the address from all the interface caches.
1143 *
1144 * This is used to remove stale entries that has been reassigned to
1145 * other machines on the network.
1146 *
1147 * @param pNetwork The network.
1148 * @param pAddr The address.
1149 * @param enmType The address type.
1150 * @param cbAddr The address size (optimization).
1151 * @param pszMsg Log message.
1152 */
1153DECLINLINE(void) intnetR0NetworkAddrCacheDeleteLocked(PINTNETNETWORK pNetwork,
1154 PCRTNETADDRU pAddr, INTNETADDRTYPE enmType,
1155 uint8_t const cbAddr,
1156 const char *pszMsg)
1157{
1158 uint32_t iIf = pNetwork->MacTab.cEntries;
1159 while (iIf--)
1160 {
1161 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
1162
1163 int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
1164 if (RT_UNLIKELY(i >= 0))
1165 intnetR0IfAddrCacheDeleteIt(pIf, &pIf->aAddrCache[enmType], i, pszMsg);
1166 }
1167}
1168
1169
1170/**
1171 * Deletes the address from all the interface caches.
1172 *
1173 * This is used to remove stale entries that has been reassigned to
1174 * other machines on the network.
1175 *
1176 * @param pNetwork The network.
1177 * @param pAddr The address.
1178 * @param enmType The address type.
1179 * @param cbAddr The address size (optimization).
1180 * @param pszMsg Log message.
1181 */
1182DECLINLINE(void) intnetR0NetworkAddrCacheDelete(PINTNETNETWORK pNetwork, PCRTNETADDRU pAddr, INTNETADDRTYPE const enmType,
1183 uint8_t const cbAddr, const char *pszMsg)
1184{
1185 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1186
1187 intnetR0NetworkAddrCacheDeleteLocked(pNetwork, pAddr, enmType, cbAddr, pszMsg);
1188
1189 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1190}
1191
1192
1193/**
1194 * Deletes the address from all the interface caches except the specified one.
1195 *
1196 * This is used to remove stale entries that has been reassigned to
1197 * other machines on the network.
1198 *
1199 * @param pNetwork The network.
1200 * @param pAddr The address.
1201 * @param enmType The address type.
1202 * @param cbAddr The address size (optimization).
1203 * @param pszMsg Log message.
1204 */
1205DECLINLINE(void) intnetR0NetworkAddrCacheDeleteMinusIf(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, PCRTNETADDRU pAddr,
1206 INTNETADDRTYPE const enmType, uint8_t const cbAddr, const char *pszMsg)
1207{
1208 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1209
1210 uint32_t iIf = pNetwork->MacTab.cEntries;
1211 while (iIf--)
1212 {
1213 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
1214 if (pIf != pIfSender)
1215 {
1216 int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
1217 if (RT_UNLIKELY(i >= 0))
1218 intnetR0IfAddrCacheDeleteIt(pIf, &pIf->aAddrCache[enmType], i, pszMsg);
1219 }
1220 }
1221
1222 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1223}
1224
1225
1226/**
1227 * Lookup an address on the network, returning the (first) interface having it
1228 * in its address cache.
1229 *
1230 * @returns Pointer to the interface on success, NULL if not found. The caller
1231 * must release the interface by calling intnetR0BusyDecIf.
1232 * @param pNetwork The network.
1233 * @param pAddr The address to lookup.
1234 * @param enmType The address type.
1235 * @param cbAddr The size of the address.
1236 */
1237DECLINLINE(PINTNETIF) intnetR0NetworkAddrCacheLookupIf(PINTNETNETWORK pNetwork, PCRTNETADDRU pAddr, INTNETADDRTYPE const enmType, uint8_t const cbAddr)
1238{
1239 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1240
1241 uint32_t iIf = pNetwork->MacTab.cEntries;
1242 while (iIf--)
1243 {
1244 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
1245 int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
1246 if (i >= 0)
1247 {
1248 intnetR0BusyIncIf(pIf);
1249 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1250 return pIf;
1251 }
1252 }
1253
1254 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1255 return NULL;
1256}
1257
1258
1259/**
1260 * Look up specified address in the network's blacklist.
1261 *
1262 * @param pNetwork The network.
1263 * @param enmType The address type.
1264 * @param pAddr The address.
1265 */
1266static bool intnetR0NetworkBlacklistLookup(PINTNETNETWORK pNetwork,
1267 PCRTNETADDRU pAddr, INTNETADDRTYPE enmType)
1268{
1269 PINTNETADDRCACHE pCache = &pNetwork->aAddrBlacklist[enmType];
1270
1271 if (RT_UNLIKELY(pCache->cEntriesAlloc == 0))
1272 return false;
1273
1274 const uint8_t cbAddr = pCache->cbAddress;
1275 Assert(cbAddr == intnetR0AddrSize(enmType));
1276
1277 for (unsigned i = 0; i < pCache->cEntries; ++i)
1278 {
1279 uint8_t *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
1280 if (intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr))
1281 return true;
1282 }
1283
1284 return false;
1285}
1286
1287
1288/**
1289 * Deletes specified address from network's blacklist.
1290 *
1291 * @param pNetwork The network.
1292 * @param enmType The address type.
1293 * @param pAddr The address.
1294 */
1295static void intnetR0NetworkBlacklistDelete(PINTNETNETWORK pNetwork,
1296 PCRTNETADDRU pAddr, INTNETADDRTYPE enmType)
1297{
1298 PINTNETADDRCACHE pCache = &pNetwork->aAddrBlacklist[enmType];
1299
1300 if (RT_UNLIKELY(pCache->cEntriesAlloc == 0))
1301 return;
1302
1303 const uint8_t cbAddr = pCache->cbAddress;
1304 Assert(cbAddr == intnetR0AddrSize(enmType));
1305
1306 for (unsigned i = 0; i < pCache->cEntries; ++i)
1307 {
1308 uint8_t *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
1309 if (!intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr))
1310 continue;
1311
1312 --pCache->cEntries;
1313 memmove(pCache->pbEntries + i * pCache->cbEntry,
1314 pCache->pbEntries + (i + 1) * pCache->cbEntry,
1315 (pCache->cEntries - i) * pCache->cbEntry);
1316 return;
1317 }
1318}
1319
1320
1321/**
1322 * Adds specified address from network's blacklist.
1323 *
1324 * @param pNetwork The network.
1325 * @param enmType The address type.
1326 * @param pAddr The address.
1327 */
1328static void intnetR0NetworkBlacklistAdd(PINTNETNETWORK pNetwork,
1329 PCRTNETADDRU pAddr, INTNETADDRTYPE enmType)
1330{
1331 PINTNETADDRCACHE pCache = &pNetwork->aAddrBlacklist[enmType];
1332
1333 if (RT_UNLIKELY(pCache->cEntriesAlloc == 0))
1334 return;
1335
1336 const uint8_t cbAddr = pCache->cbAddress;
1337 Assert(cbAddr == intnetR0AddrSize(enmType));
1338
1339 /* lookup */
1340 for (unsigned i = 0; i < pCache->cEntries; ++i)
1341 {
1342 uint8_t *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
1343 if (RT_UNLIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr)))
1344 return; /* already exists */
1345 }
1346
1347 if (pCache->cEntries >= pCache->cEntriesAlloc)
1348 {
1349 /* shift */
1350 memmove(pCache->pbEntries, pCache->pbEntries + pCache->cbEntry,
1351 pCache->cbEntry * (pCache->cEntries - 1));
1352 --pCache->cEntries;
1353 }
1354
1355 Assert(pCache->cEntries < pCache->cEntriesAlloc);
1356
1357 /* push */
1358 uint8_t *pbEntry = pCache->pbEntries + pCache->cEntries * pCache->cbEntry;
1359 memcpy(pbEntry, pAddr, cbAddr);
1360 memset(pbEntry + pCache->cbAddress, '\0', pCache->cbEntry - cbAddr);
1361 ++pCache->cEntries;
1362
1363 Assert(pCache->cEntries <= pCache->cEntriesAlloc);
1364}
1365
1366
1367/**
1368 * Adds an address to the cache, the caller is responsible for making sure it's
1369 * not already in the cache.
1370 *
1371 * The caller must not
1372 *
1373 * @param pIf The interface (for logging).
1374 * @param pCache The address cache.
1375 * @param pAddr The address.
1376 * @param pszMsg log message.
1377 */
1378static void intnetR0IfAddrCacheAddIt(PINTNETIF pIf, INTNETADDRTYPE enmAddrType, PCRTNETADDRU pAddr,
1379 const char *pszMsg)
1380{
1381 PINTNETNETWORK pNetwork = pIf->pNetwork;
1382 AssertReturnVoid(pNetwork);
1383
1384 PINTNETADDRCACHE pCache = &pIf->aAddrCache[enmAddrType];
1385
1386 const uint8_t cbAddr = pCache->cbAddress;
1387 Assert(cbAddr == intnetR0AddrSize(enmAddrType));
1388
1389 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1390
1391 bool fBlacklisted = intnetR0NetworkBlacklistLookup(pNetwork, pAddr, enmAddrType);
1392 if (fBlacklisted)
1393 {
1394 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1395
1396#ifdef LOG_ENABLED
1397 switch (enmAddrType)
1398 {
1399 case kIntNetAddrType_IPv4:
1400 Log(("%s: spoofing attempt for %RTnaipv4\n",
1401 __FUNCTION__, pAddr->IPv4));
1402 break;
1403 case kIntNetAddrType_IPv6:
1404 Log(("%s: spoofing attempt for %RTnaipv6\n",
1405 __FUNCTION__, &pAddr->IPv6));
1406 break;
1407 default:
1408 Log(("%s: spoofing attempt for %.*Rhxs (type %d)\n",
1409 __FUNCTION__, cbAddr, pAddr, enmAddrType));
1410 break;
1411 }
1412#endif
1413 return;
1414 }
1415
1416 if (RT_UNLIKELY(!pCache->cEntriesAlloc))
1417 {
1418 /* This shouldn't happen*/
1419 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1420 return;
1421 }
1422
1423 /* When the table is full, drop the older entry (FIFO). Do proper ageing? */
1424 if (pCache->cEntries >= pCache->cEntriesAlloc)
1425 {
1426 Log(("intnetR0IfAddrCacheAddIt: type=%d replacing %.*Rhxs\n",
1427 (int)(uintptr_t)(pCache - &pIf->aAddrCache[0]), pCache->cbAddress, pCache->pbEntries));
1428 memmove(pCache->pbEntries, pCache->pbEntries + pCache->cbEntry, pCache->cbEntry * (pCache->cEntries - 1));
1429 pCache->cEntries--;
1430 Assert(pCache->cEntries < pCache->cEntriesAlloc);
1431 }
1432
1433 /*
1434 * Add the new entry to the end of the array.
1435 */
1436 uint8_t *pbEntry = pCache->pbEntries + pCache->cEntries * pCache->cbEntry;
1437 memcpy(pbEntry, pAddr, pCache->cbAddress);
1438 memset(pbEntry + pCache->cbAddress, '\0', pCache->cbEntry - pCache->cbAddress);
1439
1440#ifdef LOG_ENABLED
1441 switch (enmAddrType)
1442 {
1443 case kIntNetAddrType_IPv4:
1444 Log(("intnetR0IfAddrCacheAddIt: hIf=%#x MAC=%.6Rhxs IPv4 added #%d %RTnaipv4 %s\n",
1445 pIf->hIf, &pIf->MacAddr, pCache->cEntries, pAddr->IPv4, pszMsg));
1446 break;
1447 case kIntNetAddrType_IPv6:
1448 Log(("intnetR0IfAddrCacheAddIt: hIf=%#x MAC=%.6Rhxs IPv6 added #%d %RTnaipv6 %s\n",
1449 pIf->hIf, &pIf->MacAddr, pCache->cEntries, &pAddr->IPv6, pszMsg));
1450 break;
1451 default:
1452 Log(("intnetR0IfAddrCacheAddIt: hIf=%#x MAC=%.6Rhxs type=%d added #%d %.*Rhxs %s\n",
1453 pIf->hIf, &pIf->MacAddr, enmAddrType, pCache->cEntries, pCache->cbAddress, pAddr, pszMsg));
1454 break;
1455 }
1456#endif
1457 pCache->cEntries++;
1458 Assert(pCache->cEntries <= pCache->cEntriesAlloc);
1459
1460 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1461}
1462
1463
1464/**
1465 * A intnetR0IfAddrCacheAdd worker that performs the rest of the lookup.
1466 *
1467 * @param pIf The interface (for logging).
1468 * @param pCache The address cache.
1469 * @param pAddr The address.
1470 * @param cbAddr The size of the address (optimization).
1471 * @param pszMsg Log message.
1472 */
1473static void intnetR0IfAddrCacheAddSlow(PINTNETIF pIf, INTNETADDRTYPE enmAddrType, PCRTNETADDRU pAddr,
1474 const char *pszMsg)
1475{
1476 PINTNETADDRCACHE pCache = &pIf->aAddrCache[enmAddrType];
1477
1478 const uint8_t cbAddr = pCache->cbAddress;
1479 Assert(cbAddr == intnetR0AddrSize(enmAddrType));
1480
1481 /*
1482 * Check all but the first and last entries, the caller
1483 * has already checked those.
1484 */
1485 int i = pCache->cEntries - 2;
1486 uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry;
1487 while (i >= 1)
1488 {
1489 if (RT_LIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr)))
1490 return;
1491 pbEntry += pCache->cbEntry;
1492 i--;
1493 }
1494
1495 /*
1496 * Not found, add it.
1497 */
1498 intnetR0IfAddrCacheAddIt(pIf, enmAddrType, pAddr, pszMsg);
1499}
1500
1501
1502/**
1503 * Adds an address to the cache if it's not already there.
1504 *
1505 * Must not own any spinlocks when calling this function.
1506 *
1507 * @param pIf The interface (for logging).
1508 * @param pCache The address cache.
1509 * @param pAddr The address.
1510 * @param cbAddr The size of the address (optimization).
1511 * @param pszMsg Log message.
1512 */
1513DECLINLINE(void) intnetR0IfAddrCacheAdd(PINTNETIF pIf, INTNETADDRTYPE enmAddrType, PCRTNETADDRU pAddr,
1514 const char *pszMsg)
1515{
1516 PINTNETADDRCACHE pCache = &pIf->aAddrCache[enmAddrType];
1517
1518 const uint8_t cbAddr = pCache->cbAddress;
1519 Assert(cbAddr == intnetR0AddrSize(enmAddrType));
1520
1521 /*
1522 * The optimized case is when the address the first or last cache entry.
1523 */
1524 unsigned i = pCache->cEntries;
1525 if (RT_LIKELY( i > 0
1526 && ( intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr)
1527 || (i > 1
1528 && intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * (i-1)), pAddr, cbAddr))) ))
1529 return;
1530
1531 intnetR0IfAddrCacheAddSlow(pIf, enmAddrType, pAddr, pszMsg);
1532}
1533
1534
1535/**
1536 * Destroys the specified address cache.
1537 * @param pCache The address cache.
1538 */
1539static void intnetR0IfAddrCacheDestroy(PINTNETADDRCACHE pCache)
1540{
1541 void *pvFree = pCache->pbEntries;
1542 pCache->pbEntries = NULL;
1543 pCache->cEntries = 0;
1544 pCache->cEntriesAlloc = 0;
1545 RTMemFree(pvFree);
1546}
1547
1548
1549/**
1550 * Initialize the address cache for the specified address type.
1551 *
1552 * The cache storage is preallocated and fixed size so that we can handle
1553 * inserts from problematic contexts.
1554 *
1555 * @returns VINF_SUCCESS or VERR_NO_MEMORY.
1556 * @param pCache The cache to initialize.
1557 * @param enmAddrType The address type.
1558 * @param fEnabled Whether the address cache is enabled or not.
1559 */
1560static int intnetR0IfAddrCacheInit(PINTNETADDRCACHE pCache, INTNETADDRTYPE enmAddrType, bool fEnabled)
1561{
1562 pCache->cEntries = 0;
1563 pCache->cbAddress = intnetR0AddrSize(enmAddrType);
1564 pCache->cbEntry = RT_ALIGN(pCache->cbAddress, 4);
1565 if (fEnabled)
1566 {
1567 pCache->cEntriesAlloc = 32;
1568 pCache->pbEntries = (uint8_t *)RTMemAllocZ(pCache->cEntriesAlloc * pCache->cbEntry);
1569 if (!pCache->pbEntries)
1570 return VERR_NO_MEMORY;
1571 }
1572 else
1573 {
1574 pCache->cEntriesAlloc = 0;
1575 pCache->pbEntries = NULL;
1576 }
1577 return VINF_SUCCESS;
1578}
1579
1580
1581/**
1582 * Is it a multicast or broadcast MAC address?
1583 *
1584 * @returns true if multicast, false if not.
1585 * @param pMacAddr The address to inspect.
1586 */
1587DECL_FORCE_INLINE(bool) intnetR0IsMacAddrMulticast(PCRTMAC pMacAddr)
1588{
1589 return !!(pMacAddr->au8[0] & 0x01);
1590}
1591
1592
1593/**
1594 * Is it a dummy MAC address?
1595 *
1596 * We use dummy MAC addresses for interfaces which we don't know the MAC
1597 * address of because they haven't sent anything (learning) or explicitly set
1598 * it.
1599 *
1600 * @returns true if dummy, false if not.
1601 * @param pMacAddr The address to inspect.
1602 */
1603DECL_FORCE_INLINE(bool) intnetR0IsMacAddrDummy(PCRTMAC pMacAddr)
1604{
1605 /* The dummy address are broadcast addresses, don't bother check it all. */
1606 return pMacAddr->au16[0] == 0xffff;
1607}
1608
1609
1610/**
1611 * Compares two MAC addresses.
1612 *
1613 * @returns true if equal, false if not.
1614 * @param pDstAddr1 Address 1.
1615 * @param pDstAddr2 Address 2.
1616 */
1617DECL_FORCE_INLINE(bool) intnetR0AreMacAddrsEqual(PCRTMAC pDstAddr1, PCRTMAC pDstAddr2)
1618{
1619 return pDstAddr1->au16[2] == pDstAddr2->au16[2]
1620 && pDstAddr1->au16[1] == pDstAddr2->au16[1]
1621 && pDstAddr1->au16[0] == pDstAddr2->au16[0];
1622}
1623
1624
1625/**
1626 * Switch a unicast frame based on the network layer address (OSI level 3) and
1627 * return a destination table.
1628 *
1629 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1630 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1631 * @param pNetwork The network to switch on.
1632 * @param pDstMacAddr The destination MAC address.
1633 * @param enmL3AddrType The level-3 destination address type.
1634 * @param pL3Addr The level-3 destination address.
1635 * @param cbL3Addr The size of the level-3 destination address.
1636 * @param fSrc The frame source (INTNETTRUNKDIR_WIRE).
1637 * @param pDstTab The destination output table.
1638 */
1639static INTNETSWDECISION intnetR0NetworkSwitchLevel3(PINTNETNETWORK pNetwork, PCRTMAC pDstMacAddr,
1640 INTNETADDRTYPE enmL3AddrType, PCRTNETADDRU pL3Addr, uint8_t cbL3Addr,
1641 uint32_t fSrc, PINTNETDSTTAB pDstTab)
1642{
1643 Assert(fSrc == INTNETTRUNKDIR_WIRE);
1644
1645 /*
1646 * Grab the spinlock first and do the switching.
1647 */
1648 PINTNETMACTAB pTab = &pNetwork->MacTab;
1649 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1650
1651 pDstTab->fTrunkDst = 0;
1652 pDstTab->pTrunk = 0;
1653 pDstTab->cIfs = 0;
1654
1655 /* Find exactly matching or promiscuous interfaces. */
1656 uint32_t cExactHits = 0;
1657 uint32_t iIfMac = pTab->cEntries;
1658 while (iIfMac-- > 0)
1659 {
1660 if (pTab->paEntries[iIfMac].fActive)
1661 {
1662 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1663 bool fExact = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmL3AddrType], pL3Addr, cbL3Addr) >= 0;
1664 if (fExact || pTab->paEntries[iIfMac].fPromiscuousSeeTrunk)
1665 {
1666 cExactHits += fExact;
1667
1668 uint32_t iIfDst = pDstTab->cIfs++;
1669 pDstTab->aIfs[iIfDst].pIf = pIf;
1670 pDstTab->aIfs[iIfDst].fReplaceDstMac = fExact;
1671 intnetR0BusyIncIf(pIf);
1672
1673 if (fExact)
1674 pDstMacAddr = &pIf->MacAddr; /* Avoids duplicates being sent to the host. */
1675 }
1676 }
1677 }
1678
1679 /* Network only promicuous mode ifs should see related trunk traffic. */
1680 if ( cExactHits
1681 && fSrc
1682 && pNetwork->MacTab.cPromiscuousNoTrunkEntries)
1683 {
1684 iIfMac = pTab->cEntries;
1685 while (iIfMac-- > 0)
1686 {
1687 if ( pTab->paEntries[iIfMac].fActive
1688 && pTab->paEntries[iIfMac].fPromiscuousEff
1689 && !pTab->paEntries[iIfMac].fPromiscuousSeeTrunk)
1690 {
1691 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1692 if (intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmL3AddrType], pL3Addr, cbL3Addr) < 0)
1693 {
1694 uint32_t iIfDst = pDstTab->cIfs++;
1695 pDstTab->aIfs[iIfDst].pIf = pIf;
1696 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1697 intnetR0BusyIncIf(pIf);
1698 }
1699 }
1700 }
1701 }
1702
1703 /* Does it match the host, or is the host promiscuous? */
1704 if (pTab->fHostActive)
1705 {
1706 bool fExact = intnetR0AreMacAddrsEqual(&pTab->HostMac, pDstMacAddr);
1707 if ( fExact
1708 || intnetR0IsMacAddrDummy(&pTab->HostMac)
1709 || pTab->fHostPromiscuousEff)
1710 {
1711 cExactHits += fExact;
1712 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1713 }
1714 }
1715
1716 /* Hit the wire if there are no exact matches or if it's in promiscuous mode. */
1717 if (pTab->fWireActive && (!cExactHits || pTab->fWirePromiscuousEff))
1718 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1719 pDstTab->fTrunkDst &= ~fSrc;
1720 if (pDstTab->fTrunkDst)
1721 {
1722 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1723 pDstTab->pTrunk = pTrunk;
1724 intnetR0BusyIncTrunk(pTrunk);
1725 }
1726
1727 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1728 return pDstTab->cIfs
1729 ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST)
1730 : (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK);
1731}
1732
1733
1734/**
1735 * Pre-switch a unicast MAC address.
1736 *
1737 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1738 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1739 * @param pNetwork The network to switch on.
1740 * @param fSrc The frame source.
1741 * @param pSrcAddr The source address of the frame.
1742 * @param pDstAddr The destination address of the frame.
1743 */
1744static INTNETSWDECISION intnetR0NetworkPreSwitchUnicast(PINTNETNETWORK pNetwork, uint32_t fSrc, PCRTMAC pSrcAddr,
1745 PCRTMAC pDstAddr)
1746{
1747 Assert(!intnetR0IsMacAddrMulticast(pDstAddr));
1748 Assert(fSrc);
1749
1750 /*
1751 * Grab the spinlock first and do the switching.
1752 */
1753 INTNETSWDECISION enmSwDecision = INTNETSWDECISION_BROADCAST;
1754 PINTNETMACTAB pTab = &pNetwork->MacTab;
1755 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1756
1757 /* Iterate the internal network interfaces and look for matching source and
1758 destination addresses. */
1759 uint32_t iIfMac = pTab->cEntries;
1760 while (iIfMac-- > 0)
1761 {
1762 if (pTab->paEntries[iIfMac].fActive)
1763 {
1764 /* Unknown interface address? */
1765 if (intnetR0IsMacAddrDummy(&pTab->paEntries[iIfMac].MacAddr))
1766 break;
1767
1768 /* Paranoia - this shouldn't happen, right? */
1769 if ( pSrcAddr
1770 && intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pSrcAddr))
1771 break;
1772
1773 /* Exact match? */
1774 if (intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pDstAddr))
1775 {
1776 enmSwDecision = pTab->fHostPromiscuousEff && fSrc == INTNETTRUNKDIR_WIRE
1777 ? INTNETSWDECISION_BROADCAST
1778 : INTNETSWDECISION_INTNET;
1779 break;
1780 }
1781 }
1782 }
1783
1784 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1785 return enmSwDecision;
1786}
1787
1788
1789/**
1790 * Switch a unicast MAC address and return a destination table.
1791 *
1792 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1793 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1794 * @param pNetwork The network to switch on.
1795 * @param fSrc The frame source.
1796 * @param pIfSender The sender interface, NULL if trunk. Used to
1797 * prevent sending an echo to the sender.
1798 * @param pDstAddr The destination address of the frame.
1799 * @param pDstTab The destination output table.
1800 */
1801static INTNETSWDECISION intnetR0NetworkSwitchUnicast(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETIF pIfSender,
1802 PCRTMAC pDstAddr, PINTNETDSTTAB pDstTab)
1803{
1804 AssertPtr(pDstTab);
1805 Assert(!intnetR0IsMacAddrMulticast(pDstAddr));
1806
1807 /*
1808 * Grab the spinlock first and do the switching.
1809 */
1810 PINTNETMACTAB pTab = &pNetwork->MacTab;
1811 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1812
1813 pDstTab->fTrunkDst = 0;
1814 pDstTab->pTrunk = 0;
1815 pDstTab->cIfs = 0;
1816
1817 /* Find exactly matching or promiscuous interfaces. */
1818 uint32_t cExactHits = 0;
1819 uint32_t iIfMac = pTab->cEntries;
1820 while (iIfMac-- > 0)
1821 {
1822 if (pTab->paEntries[iIfMac].fActive)
1823 {
1824 bool fExact = intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pDstAddr);
1825 if ( fExact
1826 || intnetR0IsMacAddrDummy(&pTab->paEntries[iIfMac].MacAddr)
1827 || ( pTab->paEntries[iIfMac].fPromiscuousSeeTrunk
1828 || (!fSrc && pTab->paEntries[iIfMac].fPromiscuousEff) )
1829 )
1830 {
1831 cExactHits += fExact;
1832
1833 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1834 if (RT_LIKELY(pIf != pIfSender)) /* paranoia */
1835 {
1836 uint32_t iIfDst = pDstTab->cIfs++;
1837 pDstTab->aIfs[iIfDst].pIf = pIf;
1838 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1839 intnetR0BusyIncIf(pIf);
1840 }
1841 }
1842 }
1843 }
1844
1845 /* Network only promicuous mode ifs should see related trunk traffic. */
1846 if ( cExactHits
1847 && fSrc
1848 && pNetwork->MacTab.cPromiscuousNoTrunkEntries)
1849 {
1850 iIfMac = pTab->cEntries;
1851 while (iIfMac-- > 0)
1852 {
1853 if ( pTab->paEntries[iIfMac].fPromiscuousEff
1854 && !pTab->paEntries[iIfMac].fPromiscuousSeeTrunk
1855 && pTab->paEntries[iIfMac].fActive
1856 && !intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pDstAddr)
1857 && !intnetR0IsMacAddrDummy(&pTab->paEntries[iIfMac].MacAddr) )
1858 {
1859 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1860 uint32_t iIfDst = pDstTab->cIfs++;
1861 pDstTab->aIfs[iIfDst].pIf = pIf;
1862 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1863 intnetR0BusyIncIf(pIf);
1864 }
1865 }
1866 }
1867
1868 /* Does it match the host, or is the host promiscuous? */
1869 if ( fSrc != INTNETTRUNKDIR_HOST
1870 && pTab->fHostActive)
1871 {
1872 bool fExact = intnetR0AreMacAddrsEqual(&pTab->HostMac, pDstAddr);
1873 if ( fExact
1874 || intnetR0IsMacAddrDummy(&pTab->HostMac)
1875 || pTab->fHostPromiscuousEff)
1876 {
1877 cExactHits += fExact;
1878 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1879 }
1880 }
1881
1882 /* Hit the wire if there are no exact matches or if it's in promiscuous mode. */
1883 if ( fSrc != INTNETTRUNKDIR_WIRE
1884 && pTab->fWireActive
1885 && (!cExactHits || pTab->fWirePromiscuousEff)
1886 )
1887 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1888
1889 /* Grab the trunk if we're sending to it. */
1890 if (pDstTab->fTrunkDst)
1891 {
1892 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1893 pDstTab->pTrunk = pTrunk;
1894 intnetR0BusyIncTrunk(pTrunk);
1895 }
1896
1897 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1898 return pDstTab->cIfs
1899 ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST)
1900 : (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK);
1901}
1902
1903
1904/**
1905 * Create a destination table for a broadcast frame.
1906 *
1907 * @returns INTNETSWDECISION_BROADCAST.
1908 * @param pNetwork The network to switch on.
1909 * @param fSrc The frame source.
1910 * @param pIfSender The sender interface, NULL if trunk. Used to
1911 * prevent sending an echo to the sender.
1912 * @param pDstTab The destination output table.
1913 */
1914static INTNETSWDECISION intnetR0NetworkSwitchBroadcast(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETIF pIfSender,
1915 PINTNETDSTTAB pDstTab)
1916{
1917 AssertPtr(pDstTab);
1918
1919 /*
1920 * Grab the spinlock first and record all active interfaces.
1921 */
1922 PINTNETMACTAB pTab = &pNetwork->MacTab;
1923 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1924
1925 pDstTab->fTrunkDst = 0;
1926 pDstTab->pTrunk = 0;
1927 pDstTab->cIfs = 0;
1928
1929 /* Regular interfaces. */
1930 uint32_t iIfMac = pTab->cEntries;
1931 while (iIfMac-- > 0)
1932 {
1933 if (pTab->paEntries[iIfMac].fActive)
1934 {
1935 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1936 if (pIf != pIfSender)
1937 {
1938 uint32_t iIfDst = pDstTab->cIfs++;
1939 pDstTab->aIfs[iIfDst].pIf = pIf;
1940 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1941 intnetR0BusyIncIf(pIf);
1942 }
1943 }
1944 }
1945
1946 /* The trunk interface. */
1947 if (pTab->fHostActive)
1948 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1949 if (pTab->fWireActive)
1950 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1951 pDstTab->fTrunkDst &= ~fSrc;
1952 if (pDstTab->fTrunkDst)
1953 {
1954 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1955 pDstTab->pTrunk = pTrunk;
1956 intnetR0BusyIncTrunk(pTrunk);
1957 }
1958
1959 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1960 return INTNETSWDECISION_BROADCAST;
1961}
1962
1963
1964/**
1965 * Create a destination table with the trunk and any promiscuous interfaces.
1966 *
1967 * This is only used in a fallback case of the level-3 switching, so we can
1968 * assume the wire as source and skip the sender interface filtering.
1969 *
1970 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1971 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1972 * @param pNetwork The network to switch on.
1973 * @param fSrc The frame source.
1974 * @param pDstTab The destination output table.
1975 */
1976static INTNETSWDECISION intnetR0NetworkSwitchTrunkAndPromisc(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETDSTTAB pDstTab)
1977{
1978 Assert(fSrc == INTNETTRUNKDIR_WIRE);
1979
1980 /*
1981 * Grab the spinlock first and do the switching.
1982 */
1983 PINTNETMACTAB pTab = &pNetwork->MacTab;
1984 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1985
1986 pDstTab->fTrunkDst = 0;
1987 pDstTab->pTrunk = 0;
1988 pDstTab->cIfs = 0;
1989
1990 /* Find promiscuous interfaces. */
1991 uint32_t iIfMac = pTab->cEntries;
1992 while (iIfMac-- > 0)
1993 {
1994 if ( pTab->paEntries[iIfMac].fActive
1995 && ( pTab->paEntries[iIfMac].fPromiscuousSeeTrunk
1996 || (!fSrc && pTab->paEntries[iIfMac].fPromiscuousEff) )
1997 )
1998 {
1999 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
2000 uint32_t iIfDst = pDstTab->cIfs++;
2001 pDstTab->aIfs[iIfDst].pIf = pIf;
2002 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
2003 intnetR0BusyIncIf(pIf);
2004 }
2005 }
2006
2007 /* The trunk interface. */
2008 if (pTab->fHostActive)
2009 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
2010 if (pTab->fWireActive)
2011 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
2012 pDstTab->fTrunkDst &= ~fSrc;
2013 if (pDstTab->fTrunkDst)
2014 {
2015 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
2016 pDstTab->pTrunk = pTrunk;
2017 intnetR0BusyIncTrunk(pTrunk);
2018 }
2019
2020 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2021 return !pDstTab->cIfs
2022 ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK)
2023 : (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST);
2024}
2025
2026
2027/**
2028 * Create a destination table for a trunk frame.
2029 *
2030 * @returns INTNETSWDECISION_BROADCAST.
2031 * @param pNetwork The network to switch on.
2032 * @param fSrc The frame source.
2033 * @param pDstTab The destination output table.
2034 */
2035static INTNETSWDECISION intnetR0NetworkSwitchTrunk(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETDSTTAB pDstTab)
2036{
2037 AssertPtr(pDstTab);
2038
2039 /*
2040 * Grab the spinlock first and record all active interfaces.
2041 */
2042 PINTNETMACTAB pTab= &pNetwork->MacTab;
2043 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2044
2045 pDstTab->fTrunkDst = 0;
2046 pDstTab->pTrunk = 0;
2047 pDstTab->cIfs = 0;
2048
2049 /* The trunk interface. */
2050 if (pTab->fHostActive)
2051 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
2052 if (pTab->fWireActive)
2053 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
2054 pDstTab->fTrunkDst &= ~fSrc;
2055 if (pDstTab->fTrunkDst)
2056 {
2057 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
2058 pDstTab->pTrunk = pTrunk;
2059 intnetR0BusyIncTrunk(pTrunk);
2060 }
2061
2062 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2063 return pDstTab->fTrunkDst ? INTNETSWDECISION_TRUNK : INTNETSWDECISION_DROP;
2064}
2065
2066
2067/**
2068 * Wrapper around RTMemAlloc for allocating a destination table.
2069 *
2070 * @returns VINF_SUCCESS or VERR_NO_MEMORY.
2071 * @param cEntries The size given as an entry count.
2072 * @param ppDstTab Where to store the pointer (always).
2073 */
2074DECLINLINE(int) intnetR0AllocDstTab(uint32_t cEntries, PINTNETDSTTAB *ppDstTab)
2075{
2076 PINTNETDSTTAB pDstTab;
2077 *ppDstTab = pDstTab = (PINTNETDSTTAB)RTMemAlloc(RT_OFFSETOF(INTNETDSTTAB, aIfs[cEntries]));
2078 if (RT_UNLIKELY(!pDstTab))
2079 return VERR_NO_MEMORY;
2080 return VINF_SUCCESS;
2081}
2082
2083
2084/**
2085 * Ensures that there is space for another interface in the MAC address lookup
2086 * table as well as all the destination tables.
2087 *
2088 * The caller must own the create/open/destroy mutex.
2089 *
2090 * @returns VINF_SUCCESS, VERR_NO_MEMORY or VERR_OUT_OF_RANGE.
2091 * @param pNetwork The network to operate on.
2092 */
2093static int intnetR0NetworkEnsureTabSpace(PINTNETNETWORK pNetwork)
2094{
2095 /*
2096 * The cEntries and cEntriesAllocated members are only updated while
2097 * owning the big mutex, so we only need the spinlock when doing the
2098 * actual table replacing.
2099 */
2100 PINTNETMACTAB pTab = &pNetwork->MacTab;
2101 int rc = VINF_SUCCESS;
2102 AssertReturn(pTab->cEntries <= pTab->cEntriesAllocated, VERR_INTERNAL_ERROR_2);
2103 if (pTab->cEntries + 1 > pTab->cEntriesAllocated)
2104 {
2105 uint32_t const cAllocated = pTab->cEntriesAllocated + INTNET_GROW_DSTTAB_SIZE;
2106 if (cAllocated <= INTNET_MAX_IFS)
2107 {
2108 /*
2109 * Resize the destination tables first, this can be kind of tedious.
2110 */
2111 for (uint32_t i = 0; i < pTab->cEntries; i++)
2112 {
2113 PINTNETIF pIf = pTab->paEntries[i].pIf; AssertPtr(pIf);
2114 PINTNETDSTTAB pNew;
2115 rc = intnetR0AllocDstTab(cAllocated, &pNew);
2116 if (RT_FAILURE(rc))
2117 break;
2118
2119 for (;;)
2120 {
2121 PINTNETDSTTAB pOld = pIf->pDstTab;
2122 if ( pOld
2123 && ASMAtomicCmpXchgPtr(&pIf->pDstTab, pNew, pOld))
2124 {
2125 RTMemFree(pOld);
2126 break;
2127 }
2128 intnetR0BusyWait(pNetwork, &pIf->cBusy);
2129 }
2130 }
2131
2132 /*
2133 * The trunk.
2134 */
2135 if ( RT_SUCCESS(rc)
2136 && pNetwork->MacTab.pTrunk)
2137 {
2138 AssertCompileAdjacentMembers(INTNETTRUNKIF, apTaskDstTabs, apIntDstTabs);
2139 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
2140 PINTNETDSTTAB * const ppEndDstTab = &pTrunk->apIntDstTabs[pTrunk->cIntDstTabs];
2141 for (PINTNETDSTTAB *ppDstTab = &pTrunk->apTaskDstTabs[0];
2142 ppDstTab != ppEndDstTab && RT_SUCCESS(rc);
2143 ppDstTab++)
2144 {
2145 PINTNETDSTTAB pNew;
2146 rc = intnetR0AllocDstTab(cAllocated, &pNew);
2147 if (RT_FAILURE(rc))
2148 break;
2149
2150 for (;;)
2151 {
2152 RTSpinlockAcquire(pTrunk->hDstTabSpinlock);
2153 void *pvOld = *ppDstTab;
2154 if (pvOld)
2155 *ppDstTab = pNew;
2156 RTSpinlockRelease(pTrunk->hDstTabSpinlock);
2157 if (pvOld)
2158 {
2159 RTMemFree(pvOld);
2160 break;
2161 }
2162 intnetR0BusyWait(pNetwork, &pTrunk->cBusy);
2163 }
2164 }
2165 }
2166
2167 /*
2168 * The MAC Address table itself.
2169 */
2170 if (RT_SUCCESS(rc))
2171 {
2172 PINTNETMACTABENTRY paNew = (PINTNETMACTABENTRY)RTMemAlloc(sizeof(INTNETMACTABENTRY) * cAllocated);
2173 if (paNew)
2174 {
2175 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2176
2177 PINTNETMACTABENTRY paOld = pTab->paEntries;
2178 uint32_t i = pTab->cEntries;
2179 while (i-- > 0)
2180 {
2181 paNew[i] = paOld[i];
2182
2183 paOld[i].fActive = false;
2184 paOld[i].pIf = NULL;
2185 }
2186
2187 pTab->paEntries = paNew;
2188 pTab->cEntriesAllocated = cAllocated;
2189
2190 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2191
2192 RTMemFree(paOld);
2193 }
2194 else
2195 rc = VERR_NO_MEMORY;
2196 }
2197 }
2198 else
2199 rc = VERR_OUT_OF_RANGE;
2200 }
2201 return rc;
2202}
2203
2204
2205
2206
2207#ifdef INTNET_WITH_DHCP_SNOOPING
2208
2209/**
2210 * Snoops IP assignments and releases from the DHCPv4 traffic.
2211 *
2212 * The caller is responsible for making sure this traffic between the
2213 * BOOTPS and BOOTPC ports and validate the IP header. The UDP packet
2214 * need not be validated beyond the ports.
2215 *
2216 * @param pNetwork The network this frame was seen on.
2217 * @param pIpHdr Pointer to a valid IP header. This is for pseudo
2218 * header validation, so only the minimum header size
2219 * needs to be available and valid here.
2220 * @param pUdpHdr Pointer to the UDP header in the frame.
2221 * @param cbUdpPkt What's left of the frame when starting at the UDP header.
2222 * @param fGso Set if this is a GSO frame, clear if regular.
2223 */
2224static void intnetR0NetworkSnoopDhcp(PINTNETNETWORK pNetwork, PCRTNETIPV4 pIpHdr, PCRTNETUDP pUdpHdr, uint32_t cbUdpPkt)
2225{
2226 /*
2227 * Check if the DHCP message is valid and get the type.
2228 */
2229 if (!RTNetIPv4IsUDPValid(pIpHdr, pUdpHdr, pUdpHdr + 1, cbUdpPkt, true /*fCheckSum*/))
2230 {
2231 Log6(("Bad UDP packet\n"));
2232 return;
2233 }
2234 PCRTNETBOOTP pDhcp = (PCRTNETBOOTP)(pUdpHdr + 1);
2235 uint8_t MsgType;
2236 if (!RTNetIPv4IsDHCPValid(pUdpHdr, pDhcp, cbUdpPkt - sizeof(*pUdpHdr), &MsgType))
2237 {
2238 Log6(("Bad DHCP packet\n"));
2239 return;
2240 }
2241
2242#ifdef LOG_ENABLED
2243 /*
2244 * Log it.
2245 */
2246 const char *pszType = "unknown";
2247 switch (MsgType)
2248 {
2249 case RTNET_DHCP_MT_DISCOVER: pszType = "discover"; break;
2250 case RTNET_DHCP_MT_OFFER: pszType = "offer"; break;
2251 case RTNET_DHCP_MT_REQUEST: pszType = "request"; break;
2252 case RTNET_DHCP_MT_DECLINE: pszType = "decline"; break;
2253 case RTNET_DHCP_MT_ACK: pszType = "ack"; break;
2254 case RTNET_DHCP_MT_NAC: pszType = "nac"; break;
2255 case RTNET_DHCP_MT_RELEASE: pszType = "release"; break;
2256 case RTNET_DHCP_MT_INFORM: pszType = "inform"; break;
2257 }
2258 Log6(("DHCP msg: %d (%s) client %.6Rhxs ciaddr=%d.%d.%d.%d yiaddr=%d.%d.%d.%d\n", MsgType, pszType, &pDhcp->bp_chaddr,
2259 pDhcp->bp_ciaddr.au8[0], pDhcp->bp_ciaddr.au8[1], pDhcp->bp_ciaddr.au8[2], pDhcp->bp_ciaddr.au8[3],
2260 pDhcp->bp_yiaddr.au8[0], pDhcp->bp_yiaddr.au8[1], pDhcp->bp_yiaddr.au8[2], pDhcp->bp_yiaddr.au8[3]));
2261#endif /* LOG_EANBLED */
2262
2263 /*
2264 * Act upon the message.
2265 */
2266 switch (MsgType)
2267 {
2268#if 0
2269 case RTNET_DHCP_MT_REQUEST:
2270 /** @todo Check for valid non-broadcast requests w/ IP for any of the MACs we
2271 * know, and add the IP to the cache. */
2272 break;
2273#endif
2274
2275
2276 /*
2277 * Lookup the interface by its MAC address and insert the IPv4 address into the cache.
2278 * Delete the old client address first, just in case it changed in a renewal.
2279 */
2280 case RTNET_DHCP_MT_ACK:
2281 if (intnetR0IPv4AddrIsGood(pDhcp->bp_yiaddr))
2282 {
2283 PINTNETIF pMatchingIf = NULL;
2284 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2285
2286 uint32_t iIf = pNetwork->MacTab.cEntries;
2287 while (iIf-- > 0)
2288 {
2289 PINTNETIF pCur = pNetwork->MacTab.paEntries[iIf].pIf;
2290 if ( intnetR0IfHasMacAddr(pCur)
2291 && !memcmp(&pCur->MacAddr, &pDhcp->bp_chaddr, sizeof(RTMAC)))
2292 {
2293 intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
2294 (PCRTNETADDRU)&pDhcp->bp_ciaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_ACK");
2295 if (!pMatchingIf)
2296 {
2297 pMatchingIf = pCur;
2298 intnetR0BusyIncIf(pMatchingIf);
2299 }
2300 }
2301 }
2302
2303 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2304
2305 if (pMatchingIf)
2306 {
2307 intnetR0IfAddrCacheAdd(pMatchingIf, kIntNetAddrType_IPv4,
2308 (PCRTNETADDRU)&pDhcp->bp_yiaddr, "DHCP_MT_ACK");
2309 intnetR0BusyDecIf(pMatchingIf);
2310 }
2311 }
2312 return;
2313
2314
2315 /*
2316 * Lookup the interface by its MAC address and remove the IPv4 address(es) from the cache.
2317 */
2318 case RTNET_DHCP_MT_RELEASE:
2319 {
2320 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2321
2322 uint32_t iIf = pNetwork->MacTab.cEntries;
2323 while (iIf-- > 0)
2324 {
2325 PINTNETIF pCur = pNetwork->MacTab.paEntries[iIf].pIf;
2326 if ( intnetR0IfHasMacAddr(pCur)
2327 && !memcmp(&pCur->MacAddr, &pDhcp->bp_chaddr, sizeof(RTMAC)))
2328 {
2329 intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
2330 (PCRTNETADDRU)&pDhcp->bp_ciaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_RELEASE");
2331 intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
2332 (PCRTNETADDRU)&pDhcp->bp_yiaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_RELEASE");
2333 }
2334 }
2335
2336 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2337 break;
2338 }
2339 }
2340
2341}
2342
2343
2344/**
2345 * Worker for intnetR0TrunkIfSnoopAddr that takes care of what
2346 * is likely to be a DHCP message.
2347 *
2348 * The caller has already check that the UDP source and destination ports
2349 * are BOOTPS or BOOTPC.
2350 *
2351 * @param pNetwork The network this frame was seen on.
2352 * @param pSG The gather list for the frame.
2353 */
2354static void intnetR0TrunkIfSnoopDhcp(PINTNETNETWORK pNetwork, PCINTNETSG pSG)
2355{
2356 /*
2357 * Get a pointer to a linear copy of the full packet, using the
2358 * temporary buffer if necessary.
2359 */
2360 PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((PCRTNETETHERHDR)pSG->aSegs[0].pv + 1);
2361 uint32_t cbPacket = pSG->cbTotal - sizeof(RTNETETHERHDR);
2362 if (pSG->cSegsUsed > 1)
2363 {
2364 cbPacket = RT_MIN(cbPacket, INTNETNETWORK_TMP_SIZE);
2365 Log6(("intnetR0TrunkIfSnoopDhcp: Copying IPv4/UDP/DHCP pkt %u\n", cbPacket));
2366 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
2367 return;
2368 //pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
2369 pIpHdr = (PCRTNETIPV4)pNetwork->pbTmp;
2370 }
2371
2372 /*
2373 * Validate the IP header and find the UDP packet.
2374 */
2375 if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, pSG->cbTotal - sizeof(RTNETETHERHDR), true /*fChecksum*/))
2376 {
2377 Log(("intnetR0TrunkIfSnoopDhcp: bad ip header\n"));
2378 return;
2379 }
2380 uint32_t cbIpHdr = pIpHdr->ip_hl * 4;
2381
2382 /*
2383 * Hand it over to the common DHCP snooper.
2384 */
2385 intnetR0NetworkSnoopDhcp(pNetwork, pIpHdr, (PCRTNETUDP)((uintptr_t)pIpHdr + cbIpHdr), cbPacket - cbIpHdr);
2386}
2387
2388#endif /* INTNET_WITH_DHCP_SNOOPING */
2389
2390
2391/**
2392 * Snoops up source addresses from ARP requests and purge these from the address
2393 * caches.
2394 *
2395 * The purpose of this purging is to get rid of stale addresses.
2396 *
2397 * @param pNetwork The network this frame was seen on.
2398 * @param pSG The gather list for the frame.
2399 */
2400static void intnetR0TrunkIfSnoopArp(PINTNETNETWORK pNetwork, PCINTNETSG pSG)
2401{
2402 /*
2403 * Check the minimum size first.
2404 */
2405 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4)))
2406 return;
2407
2408 /*
2409 * Copy to temporary buffer if necessary.
2410 */
2411 uint32_t cbPacket = RT_MIN(pSG->cbTotal, sizeof(RTNETARPIPV4));
2412 PCRTNETARPIPV4 pArpIPv4 = (PCRTNETARPIPV4)((uintptr_t)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
2413 if ( pSG->cSegsUsed != 1
2414 && pSG->aSegs[0].cb < cbPacket)
2415 {
2416 if ( (pSG->fFlags & (INTNETSG_FLAGS_ARP_IPV4 | INTNETSG_FLAGS_PKT_CP_IN_TMP))
2417 != (INTNETSG_FLAGS_ARP_IPV4 | INTNETSG_FLAGS_PKT_CP_IN_TMP)
2418 && !intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
2419 return;
2420 pArpIPv4 = (PCRTNETARPIPV4)pNetwork->pbTmp;
2421 }
2422
2423 /*
2424 * Ignore packets which doesn't interest us or we perceive as malformed.
2425 */
2426 if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
2427 || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
2428 || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
2429 || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
2430 return;
2431 uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
2432 if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
2433 && ar_oper != RTNET_ARPOP_REPLY))
2434 {
2435 Log6(("ts-ar: op=%#x\n", ar_oper));
2436 return;
2437 }
2438
2439 /*
2440 * Delete the source address if it's OK.
2441 */
2442 if ( !intnetR0IsMacAddrMulticast(&pArpIPv4->ar_sha)
2443 && ( pArpIPv4->ar_sha.au16[0]
2444 || pArpIPv4->ar_sha.au16[1]
2445 || pArpIPv4->ar_sha.au16[2])
2446 && intnetR0IPv4AddrIsGood(pArpIPv4->ar_spa))
2447 {
2448 Log6(("ts-ar: %d.%d.%d.%d / %.6Rhxs\n", pArpIPv4->ar_spa.au8[0], pArpIPv4->ar_spa.au8[1],
2449 pArpIPv4->ar_spa.au8[2], pArpIPv4->ar_spa.au8[3], &pArpIPv4->ar_sha));
2450 intnetR0NetworkAddrCacheDelete(pNetwork, (PCRTNETADDRU)&pArpIPv4->ar_spa,
2451 kIntNetAddrType_IPv4, sizeof(pArpIPv4->ar_spa), "tif/arp");
2452 }
2453}
2454
2455
2456#ifdef INTNET_WITH_DHCP_SNOOPING
2457/**
2458 * Snoop up addresses from ARP and DHCP traffic from frames coming
2459 * over the trunk connection.
2460 *
2461 * The caller is responsible for do some basic filtering before calling
2462 * this function.
2463 * For IPv4 this means checking against the minimum DHCPv4 frame size.
2464 *
2465 * @param pNetwork The network.
2466 * @param pSG The SG list for the frame.
2467 * @param EtherType The Ethertype of the frame.
2468 */
2469static void intnetR0TrunkIfSnoopAddr(PINTNETNETWORK pNetwork, PCINTNETSG pSG, uint16_t EtherType)
2470{
2471 switch (EtherType)
2472 {
2473 case RTNET_ETHERTYPE_IPV4:
2474 {
2475 uint32_t cbIpHdr;
2476 uint8_t b;
2477
2478 Assert(pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN);
2479 if (pSG->aSegs[0].cb >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN)
2480 {
2481 /* check if the protocol is UDP */
2482 PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((uint8_t const *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
2483 if (pIpHdr->ip_p != RTNETIPV4_PROT_UDP)
2484 return;
2485
2486 /* get the TCP header length */
2487 cbIpHdr = pIpHdr->ip_hl * 4;
2488 }
2489 else
2490 {
2491 /* check if the protocol is UDP */
2492 if ( intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV4, ip_p))
2493 != RTNETIPV4_PROT_UDP)
2494 return;
2495
2496 /* get the TCP header length */
2497 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + 0); /* (IPv4 first byte, a bitfield) */
2498 cbIpHdr = (b & 0x0f) * 4;
2499 }
2500 if (cbIpHdr < RTNETIPV4_MIN_LEN)
2501 return;
2502
2503 /* compare the ports. */
2504 if (pSG->aSegs[0].cb >= sizeof(RTNETETHERHDR) + cbIpHdr + RTNETUDP_MIN_LEN)
2505 {
2506 PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uint8_t const *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR) + cbIpHdr);
2507 if ( ( RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPS
2508 && RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPS)
2509 || ( RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPC
2510 && RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPC))
2511 return;
2512 }
2513 else
2514 {
2515 /* get the lower byte of the UDP source port number. */
2516 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_sport) + 1);
2517 if ( b != RTNETIPV4_PORT_BOOTPS
2518 && b != RTNETIPV4_PORT_BOOTPC)
2519 return;
2520 uint8_t SrcPort = b;
2521 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_sport));
2522 if (b)
2523 return;
2524
2525 /* get the lower byte of the UDP destination port number. */
2526 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_dport) + 1);
2527 if ( b != RTNETIPV4_PORT_BOOTPS
2528 && b != RTNETIPV4_PORT_BOOTPC)
2529 return;
2530 if (b == SrcPort)
2531 return;
2532 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_dport));
2533 if (b)
2534 return;
2535 }
2536 intnetR0TrunkIfSnoopDhcp(pNetwork, pSG);
2537 break;
2538 }
2539
2540 case RTNET_ETHERTYPE_ARP:
2541 intnetR0TrunkIfSnoopArp(pNetwork, pSG);
2542 break;
2543 }
2544}
2545#endif /* INTNET_WITH_DHCP_SNOOPING */
2546
2547/**
2548 * Deals with an IPv6 packet.
2549 *
2550 * This will fish out the source IP address and add it to the cache.
2551 * Then it will look for DHCPRELEASE requests (?) and anything else
2552 * that we might find useful later.
2553 *
2554 * @param pIf The interface that's sending the frame.
2555 * @param pIpHdr Pointer to the IPv4 header in the frame.
2556 * @param cbPacket The size of the packet, or more correctly the
2557 * size of the frame without the ethernet header.
2558 * @param fGso Set if this is a GSO frame, clear if regular.
2559 */
2560static void intnetR0IfSnoopIPv6SourceAddr(PINTNETIF pIf, PCRTNETIPV6 pIpHdr, uint32_t cbPacket, bool fGso)
2561{
2562 NOREF(fGso);
2563
2564 /*
2565 * Check the header size first to prevent access invalid data.
2566 */
2567 if (cbPacket < RTNETIPV6_MIN_LEN)
2568 return;
2569
2570 /*
2571 * If the source address is good (not multicast) and
2572 * not already in the address cache of the sender, add it.
2573 */
2574 RTNETADDRU Addr;
2575 Addr.IPv6 = pIpHdr->ip6_src;
2576
2577 if ( intnetR0IPv6AddrIsGood(Addr.IPv6) && (pIpHdr->ip6_hlim == 0xff)
2578 && intnetR0IfAddrCacheLookupLikely(&pIf->aAddrCache[kIntNetAddrType_IPv6], &Addr, sizeof(Addr.IPv6)) < 0)
2579 {
2580 intnetR0IfAddrCacheAdd(pIf, kIntNetAddrType_IPv6, &Addr, "if/ipv6");
2581 }
2582}
2583
2584
2585/**
2586 * Deals with an IPv4 packet.
2587 *
2588 * This will fish out the source IP address and add it to the cache.
2589 * Then it will look for DHCPRELEASE requests (?) and anything else
2590 * that we might find useful later.
2591 *
2592 * @param pIf The interface that's sending the frame.
2593 * @param pIpHdr Pointer to the IPv4 header in the frame.
2594 * @param cbPacket The size of the packet, or more correctly the
2595 * size of the frame without the ethernet header.
2596 * @param fGso Set if this is a GSO frame, clear if regular.
2597 */
2598static void intnetR0IfSnoopIPv4SourceAddr(PINTNETIF pIf, PCRTNETIPV4 pIpHdr, uint32_t cbPacket, bool fGso)
2599{
2600 /*
2601 * Check the header size first to prevent access invalid data.
2602 */
2603 if (cbPacket < RTNETIPV4_MIN_LEN)
2604 return;
2605 uint32_t cbHdr = (uint32_t)pIpHdr->ip_hl * 4;
2606 if ( cbHdr < RTNETIPV4_MIN_LEN
2607 || cbPacket < cbHdr)
2608 return;
2609
2610 /*
2611 * If the source address is good (not broadcast or my network) and
2612 * not already in the address cache of the sender, add it. Validate
2613 * the IP header before adding it.
2614 */
2615 bool fValidatedIpHdr = false;
2616 RTNETADDRU Addr;
2617 Addr.IPv4 = pIpHdr->ip_src;
2618 if ( intnetR0IPv4AddrIsGood(Addr.IPv4)
2619 && intnetR0IfAddrCacheLookupLikely(&pIf->aAddrCache[kIntNetAddrType_IPv4], &Addr, sizeof(Addr.IPv4)) < 0)
2620 {
2621 if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, cbPacket, !fGso /*fChecksum*/))
2622 {
2623 Log(("intnetR0IfSnoopIPv4SourceAddr: bad ip header\n"));
2624 return;
2625 }
2626
2627 intnetR0IfAddrCacheAddIt(pIf, kIntNetAddrType_IPv4, &Addr, "if/ipv4");
2628 fValidatedIpHdr = true;
2629 }
2630
2631#ifdef INTNET_WITH_DHCP_SNOOPING
2632 /*
2633 * Check for potential DHCP packets.
2634 */
2635 if ( pIpHdr->ip_p == RTNETIPV4_PROT_UDP /* DHCP is UDP. */
2636 && cbPacket >= cbHdr + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN /* Min DHCP packet len. */
2637 && !fGso) /* GSO is not applicable to DHCP traffic. */
2638 {
2639 PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uint8_t const *)pIpHdr + cbHdr);
2640 if ( ( RT_BE2H_U16(pUdpHdr->uh_dport) == RTNETIPV4_PORT_BOOTPS
2641 || RT_BE2H_U16(pUdpHdr->uh_sport) == RTNETIPV4_PORT_BOOTPS)
2642 && ( RT_BE2H_U16(pUdpHdr->uh_sport) == RTNETIPV4_PORT_BOOTPC
2643 || RT_BE2H_U16(pUdpHdr->uh_dport) == RTNETIPV4_PORT_BOOTPC))
2644 {
2645 if ( fValidatedIpHdr
2646 || RTNetIPv4IsHdrValid(pIpHdr, cbPacket, cbPacket, !fGso /*fChecksum*/))
2647 intnetR0NetworkSnoopDhcp(pIf->pNetwork, pIpHdr, pUdpHdr, cbPacket - cbHdr);
2648 else
2649 Log(("intnetR0IfSnoopIPv4SourceAddr: bad ip header (dhcp)\n"));
2650 }
2651 }
2652#endif /* INTNET_WITH_DHCP_SNOOPING */
2653}
2654
2655
2656/**
2657 * Snoop up source addresses from an ARP request or reply.
2658 *
2659 * @param pIf The interface that's sending the frame.
2660 * @param pHdr The ARP header.
2661 * @param cbPacket The size of the packet (might be larger than the ARP
2662 * request 'cause of min ethernet frame size).
2663 * @param pfSgFlags Pointer to the SG flags. This is used to tag the packet so we
2664 * don't have to repeat the frame parsing in intnetR0TrunkIfSend.
2665 */
2666static void intnetR0IfSnoopArpAddr(PINTNETIF pIf, PCRTNETARPIPV4 pArpIPv4, uint32_t cbPacket, uint16_t *pfSgFlags)
2667{
2668 /*
2669 * Ignore packets which doesn't interest us or we perceive as malformed.
2670 */
2671 if (RT_UNLIKELY(cbPacket < sizeof(RTNETARPIPV4)))
2672 return;
2673 if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
2674 || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
2675 || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
2676 || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
2677 return;
2678 uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
2679 if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
2680 && ar_oper != RTNET_ARPOP_REPLY))
2681 {
2682 Log6(("ar_oper=%#x\n", ar_oper));
2683 return;
2684 }
2685
2686 /*
2687 * Tag the SG as ARP IPv4 for later editing, then check for addresses
2688 * which can be removed or added to the address cache of the sender.
2689 */
2690 *pfSgFlags |= INTNETSG_FLAGS_ARP_IPV4;
2691
2692 if ( ar_oper == RTNET_ARPOP_REPLY
2693 && !intnetR0IsMacAddrMulticast(&pArpIPv4->ar_tha)
2694 && ( pArpIPv4->ar_tha.au16[0]
2695 || pArpIPv4->ar_tha.au16[1]
2696 || pArpIPv4->ar_tha.au16[2])
2697 && intnetR0IPv4AddrIsGood(pArpIPv4->ar_tpa))
2698 intnetR0IfAddrCacheDelete(pIf, &pIf->aAddrCache[kIntNetAddrType_IPv4],
2699 (PCRTNETADDRU)&pArpIPv4->ar_tpa, sizeof(RTNETADDRIPV4), "if/arp");
2700
2701 if ( !memcmp(&pArpIPv4->ar_sha, &pIf->MacAddr, sizeof(RTMAC))
2702 && intnetR0IPv4AddrIsGood(pArpIPv4->ar_spa))
2703 {
2704 intnetR0IfAddrCacheAdd(pIf, kIntNetAddrType_IPv4, (PCRTNETADDRU)&pArpIPv4->ar_spa, "if/arp");
2705 }
2706}
2707
2708
2709
2710/**
2711 * Checks packets send by a normal interface for new network
2712 * layer addresses.
2713 *
2714 * @param pIf The interface that's sending the frame.
2715 * @param pbFrame The frame.
2716 * @param cbFrame The size of the frame.
2717 * @param fGso Set if this is a GSO frame, clear if regular.
2718 * @param pfSgFlags Pointer to the SG flags. This is used to tag the packet so we
2719 * don't have to repeat the frame parsing in intnetR0TrunkIfSend.
2720 */
2721static void intnetR0IfSnoopAddr(PINTNETIF pIf, uint8_t const *pbFrame, uint32_t cbFrame, bool fGso, uint16_t *pfSgFlags)
2722{
2723 /*
2724 * Fish out the ethertype and look for stuff we can handle.
2725 */
2726 if (cbFrame <= sizeof(RTNETETHERHDR))
2727 return;
2728 cbFrame -= sizeof(RTNETETHERHDR);
2729
2730 uint16_t EtherType = RT_H2BE_U16(((PCRTNETETHERHDR)pbFrame)->EtherType);
2731 switch (EtherType)
2732 {
2733 case RTNET_ETHERTYPE_IPV4:
2734 intnetR0IfSnoopIPv4SourceAddr(pIf, (PCRTNETIPV4)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, fGso);
2735 break;
2736
2737 case RTNET_ETHERTYPE_IPV6:
2738 intnetR0IfSnoopIPv6SourceAddr(pIf, (PCRTNETIPV6)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, fGso);
2739 break;
2740
2741#if 0 /** @todo IntNet: implement IPX for wireless MAC sharing? */
2742 case RTNET_ETHERTYPE_IPX_1:
2743 case RTNET_ETHERTYPE_IPX_2:
2744 case RTNET_ETHERTYPE_IPX_3:
2745 intnetR0IfSnoopIpxSourceAddr(pIf, (PCINTNETIPX)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, pfSgFlags);
2746 break;
2747#endif
2748 case RTNET_ETHERTYPE_ARP:
2749 intnetR0IfSnoopArpAddr(pIf, (PCRTNETARPIPV4)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, pfSgFlags);
2750 break;
2751 }
2752}
2753
2754
2755/**
2756 * Writes a frame packet to the ring buffer.
2757 *
2758 * @returns VBox status code.
2759 * @param pBuf The buffer.
2760 * @param pRingBuf The ring buffer to read from.
2761 * @param pSG The gather list.
2762 * @param pNewDstMac Set the destination MAC address to the address if specified.
2763 */
2764static int intnetR0RingWriteFrame(PINTNETRINGBUF pRingBuf, PCINTNETSG pSG, PCRTMAC pNewDstMac)
2765{
2766 PINTNETHDR pHdr = NULL; /* shut up gcc*/
2767 void *pvDst = NULL; /* ditto */
2768 int rc;
2769 if (pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
2770 rc = IntNetRingAllocateFrame(pRingBuf, pSG->cbTotal, &pHdr, &pvDst);
2771 else
2772 rc = IntNetRingAllocateGsoFrame(pRingBuf, pSG->cbTotal, &pSG->GsoCtx, &pHdr, &pvDst);
2773 if (RT_SUCCESS(rc))
2774 {
2775 IntNetSgRead(pSG, pvDst);
2776 if (pNewDstMac)
2777 ((PRTNETETHERHDR)pvDst)->DstMac = *pNewDstMac;
2778
2779 IntNetRingCommitFrame(pRingBuf, pHdr);
2780 return VINF_SUCCESS;
2781 }
2782 return rc;
2783}
2784
2785
2786/**
2787 * Sends a frame to a specific interface.
2788 *
2789 * @param pIf The interface.
2790 * @param pIfSender The interface sending the frame. This is NULL if it's the trunk.
2791 * @param pSG The gather buffer which data is being sent to the interface.
2792 * @param pNewDstMac Set the destination MAC address to the address if specified.
2793 */
2794static void intnetR0IfSend(PINTNETIF pIf, PINTNETIF pIfSender, PINTNETSG pSG, PCRTMAC pNewDstMac)
2795{
2796 /*
2797 * Grab the receive/producer lock and copy over the frame.
2798 */
2799 RTSpinlockAcquire(pIf->hRecvInSpinlock);
2800 int rc = intnetR0RingWriteFrame(&pIf->pIntBuf->Recv, pSG, pNewDstMac);
2801 RTSpinlockRelease(pIf->hRecvInSpinlock);
2802 if (RT_SUCCESS(rc))
2803 {
2804 pIf->cYields = 0;
2805 RTSemEventSignal(pIf->hRecvEvent);
2806 return;
2807 }
2808
2809 Log(("intnetR0IfSend: overflow cb=%d hIf=%RX32\n", pSG->cbTotal, pIf->hIf));
2810
2811 /*
2812 * Scheduling hack, for unicore machines primarily.
2813 */
2814 if ( pIf->fActive
2815 && pIf->cYields < 4 /* just twice */
2816 && pIfSender /* but not if it's from the trunk */
2817 && RTThreadPreemptIsEnabled(NIL_RTTHREAD)
2818 )
2819 {
2820 unsigned cYields = 2;
2821 while (--cYields > 0)
2822 {
2823 RTSemEventSignal(pIf->hRecvEvent);
2824 RTThreadYield();
2825
2826 RTSpinlockAcquire(pIf->hRecvInSpinlock);
2827 rc = intnetR0RingWriteFrame(&pIf->pIntBuf->Recv, pSG, pNewDstMac);
2828 RTSpinlockRelease(pIf->hRecvInSpinlock);
2829 if (RT_SUCCESS(rc))
2830 {
2831 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatYieldsOk);
2832 RTSemEventSignal(pIf->hRecvEvent);
2833 return;
2834 }
2835 pIf->cYields++;
2836 }
2837 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatYieldsNok);
2838 }
2839
2840 /* ok, the frame is lost. */
2841 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatLost);
2842 RTSemEventSignal(pIf->hRecvEvent);
2843}
2844
2845
2846/**
2847 * Fallback path that does the GSO segmenting before passing the frame on to the
2848 * trunk interface.
2849 *
2850 * The caller holds the trunk lock.
2851 *
2852 * @param pThis The trunk.
2853 * @param pIfSender The IF sending the frame.
2854 * @param pSG Pointer to the gather list.
2855 * @param fDst The destination flags.
2856 */
2857static int intnetR0TrunkIfSendGsoFallback(PINTNETTRUNKIF pThis, PINTNETIF pIfSender, PINTNETSG pSG, uint32_t fDst)
2858{
2859 /*
2860 * Since we're only using this for GSO frame coming from the internal
2861 * network interfaces and never the trunk, we can assume there is only
2862 * one segment. This simplifies the code quite a bit.
2863 */
2864 Assert(PDMNetGsoIsValid(&pSG->GsoCtx, sizeof(pSG->GsoCtx), pSG->cbTotal));
2865 AssertReturn(pSG->cSegsUsed == 1, VERR_INTERNAL_ERROR_4);
2866
2867 union
2868 {
2869 uint8_t abBuf[sizeof(INTNETSG) + sizeof(INTNETSEG)];
2870 INTNETSG SG;
2871 } u;
2872
2873 /** @todo We have to adjust MSS so it does not exceed the value configured for
2874 * the host's interface.
2875 */
2876
2877 /*
2878 * Carve out the frame segments with the header and frame in different
2879 * scatter / gather segments.
2880 */
2881 uint32_t const cSegs = PDMNetGsoCalcSegmentCount(&pSG->GsoCtx, pSG->cbTotal);
2882 for (uint32_t iSeg = 0; iSeg < cSegs; iSeg++)
2883 {
2884 uint32_t cbSegPayload, cbSegHdrs;
2885 uint32_t offSegPayload = PDMNetGsoCarveSegment(&pSG->GsoCtx, (uint8_t *)pSG->aSegs[0].pv, pSG->cbTotal, iSeg, cSegs,
2886 pIfSender->abGsoHdrs, &cbSegHdrs, &cbSegPayload);
2887
2888 IntNetSgInitTempSegs(&u.SG, cbSegHdrs + cbSegPayload, 2, 2);
2889 u.SG.aSegs[0].Phys = NIL_RTHCPHYS;
2890 u.SG.aSegs[0].pv = pIfSender->abGsoHdrs;
2891 u.SG.aSegs[0].cb = cbSegHdrs;
2892 u.SG.aSegs[1].Phys = NIL_RTHCPHYS;
2893 u.SG.aSegs[1].pv = (uint8_t *)pSG->aSegs[0].pv + offSegPayload;
2894 u.SG.aSegs[1].cb = (uint32_t)cbSegPayload;
2895
2896 int rc = pThis->pIfPort->pfnXmit(pThis->pIfPort, pIfSender->pvIfData, &u.SG, fDst);
2897 if (RT_FAILURE(rc))
2898 return rc;
2899 }
2900 return VINF_SUCCESS;
2901}
2902
2903
2904/**
2905 * Checks if any of the given trunk destinations can handle this kind of GSO SG.
2906 *
2907 * @returns true if it can, false if it cannot.
2908 * @param pThis The trunk.
2909 * @param pSG The scatter / gather buffer.
2910 * @param fDst The destination mask.
2911 */
2912DECLINLINE(bool) intnetR0TrunkIfCanHandleGsoFrame(PINTNETTRUNKIF pThis, PINTNETSG pSG, uint32_t fDst)
2913{
2914 uint8_t u8Type = pSG->GsoCtx.u8Type;
2915 AssertReturn(u8Type < 32, false); /* paranoia */
2916 uint32_t fMask = RT_BIT_32(u8Type);
2917
2918 if (fDst == INTNETTRUNKDIR_HOST)
2919 return !!(pThis->fHostGsoCapabilites & fMask);
2920 if (fDst == INTNETTRUNKDIR_WIRE)
2921 return !!(pThis->fWireGsoCapabilites & fMask);
2922 Assert(fDst == (INTNETTRUNKDIR_WIRE | INTNETTRUNKDIR_HOST));
2923 return !!(pThis->fHostGsoCapabilites & pThis->fWireGsoCapabilites & fMask);
2924}
2925
2926
2927/**
2928 * Calculates the checksum of a full ipv6 frame.
2929 *
2930 * @returns 16-bit hecksum value.
2931 * @param pIpHdr The IPv6 header (network endian (big)).
2932 * @param bProtocol The protocol number. This can be the same as the
2933 * ip6_nxt field, but doesn't need to be.
2934 * @param cbPkt The packet size (host endian of course). This can
2935 * be the same as the ip6_plen field, but as with @a
2936 * bProtocol it won't be when extension headers are
2937 * present. For UDP this will be uh_ulen converted to
2938 * host endian.
2939 */
2940static uint16_t computeIPv6FullChecksum(PCRTNETIPV6 pIpHdr)
2941{
2942 uint16_t const *data;
2943 int len = RT_BE2H_U16(pIpHdr->ip6_plen);
2944 uint32_t sum = RTNetIPv6PseudoChecksum(pIpHdr);
2945
2946 /* add the payload */
2947 data = (uint16_t *) (pIpHdr + 1);
2948 while(len > 1)
2949 {
2950 sum += *(data);
2951 data++;
2952 len -= 2;
2953 }
2954
2955 if(len > 0)
2956 sum += *((uint8_t *) data);
2957
2958 while(sum >> 16)
2959 sum = (sum & 0xffff) + (sum >> 16);
2960
2961 return (uint16_t) ~sum;
2962}
2963
2964
2965/**
2966 * Rewrite VM MAC address with shared host MAC address inside IPv6
2967 * Neighbor Discovery datagrams.
2968 */
2969static void intnetR0TrunkSharedMacEditIPv6FromIntNet(PINTNETTRUNKIF pThis, PINTNETIF pIfSender,
2970 PRTNETETHERHDR pEthHdr, uint32_t cb)
2971{
2972 if (RT_UNLIKELY(cb < sizeof(*pEthHdr)))
2973 return;
2974
2975 /* have IPv6 header */
2976 PRTNETIPV6 pIPv6 = (PRTNETIPV6)(pEthHdr + 1);
2977 cb -= sizeof(*pEthHdr);
2978 if (RT_UNLIKELY(cb < sizeof(*pIPv6)))
2979 return;
2980
2981 if ( pIPv6->ip6_nxt != RTNETIPV6_PROT_ICMPV6
2982 || pIPv6->ip6_hlim != 0xff)
2983 return;
2984
2985 PRTNETICMPV6HDR pICMPv6 = (PRTNETICMPV6HDR)(pIPv6 + 1);
2986 cb -= sizeof(*pIPv6);
2987 if (RT_UNLIKELY(cb < sizeof(*pICMPv6)))
2988 return;
2989
2990 uint32_t hdrlen = 0;
2991 uint8_t llaopt = RTNETIPV6_ICMP_ND_SLLA_OPT;
2992
2993 uint8_t type = pICMPv6->icmp6_type;
2994 switch (type)
2995 {
2996 case RTNETIPV6_ICMP_TYPE_RS:
2997 hdrlen = 8;
2998 break;
2999
3000 case RTNETIPV6_ICMP_TYPE_RA:
3001 hdrlen = 16;
3002 break;
3003
3004 case RTNETIPV6_ICMP_TYPE_NS:
3005 hdrlen = 24;
3006 break;
3007
3008 case RTNETIPV6_ICMP_TYPE_NA:
3009 hdrlen = 24;
3010 llaopt = RTNETIPV6_ICMP_ND_TLLA_OPT;
3011 break;
3012
3013 default:
3014 return;
3015 }
3016
3017 AssertReturnVoid(hdrlen > 0);
3018 if (RT_UNLIKELY(cb < hdrlen))
3019 return;
3020
3021 if (RT_UNLIKELY(pICMPv6->icmp6_code != 0))
3022 return;
3023
3024 PRTNETNDP_LLA_OPT pLLAOpt = NULL;
3025 char *pOpt = (char *)pICMPv6 + hdrlen;
3026 cb -= hdrlen;
3027
3028 while (cb >= 8)
3029 {
3030 uint8_t opt = ((uint8_t *)pOpt)[0];
3031 uint32_t optlen = (uint32_t)((uint8_t *)pOpt)[1] * 8;
3032 if (RT_UNLIKELY(cb < optlen))
3033 return;
3034
3035 if (opt == llaopt)
3036 {
3037 if (RT_UNLIKELY(optlen != 8))
3038 return;
3039 pLLAOpt = (PRTNETNDP_LLA_OPT)pOpt;
3040 break;
3041 }
3042
3043 pOpt += optlen;
3044 cb -= optlen;
3045 }
3046
3047 if (pLLAOpt == NULL)
3048 return;
3049
3050 if (memcmp(&pLLAOpt->lla, &pIfSender->MacAddr, sizeof(RTMAC)) != 0)
3051 return;
3052
3053 /* overwrite VM's MAC with host's MAC */
3054 pLLAOpt->lla = pThis->MacAddr;
3055
3056 /* recompute the checksum */
3057 pICMPv6->icmp6_cksum = 0;
3058 pICMPv6->icmp6_cksum = computeIPv6FullChecksum(pIPv6);
3059}
3060
3061
3062/**
3063 * Sends a frame down the trunk.
3064 *
3065 * @param pThis The trunk.
3066 * @param pNetwork The network the frame is being sent to.
3067 * @param pIfSender The IF sending the frame. Used for MAC address
3068 * checks in shared MAC mode.
3069 * @param fDst The destination flags.
3070 * @param pSG Pointer to the gather list.
3071 */
3072static void intnetR0TrunkIfSend(PINTNETTRUNKIF pThis, PINTNETNETWORK pNetwork, PINTNETIF pIfSender,
3073 uint32_t fDst, PINTNETSG pSG)
3074{
3075 /*
3076 * Quick sanity check.
3077 */
3078 AssertPtr(pThis);
3079 AssertPtr(pNetwork);
3080 AssertPtr(pIfSender);
3081 AssertPtr(pSG);
3082 Assert(fDst);
3083 AssertReturnVoid(pThis->pIfPort);
3084
3085 /*
3086 * Edit the frame if we're sharing the MAC address with the host on the wire.
3087 *
3088 * If the frame is headed for both the host and the wire, we'll have to send
3089 * it to the host before making any modifications, and force the OS specific
3090 * backend to copy it. We do this by marking it as TEMP (which is always the
3091 * case right now).
3092 */
3093 if ( (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
3094 && (fDst & INTNETTRUNKDIR_WIRE))
3095 {
3096 /*
3097 * Dispatch it to the host before making changes.
3098 */
3099 if (fDst & INTNETTRUNKDIR_HOST)
3100 {
3101 Assert(pSG->fFlags & INTNETSG_FLAGS_TEMP); /* make sure copy is forced */
3102 intnetR0TrunkIfSend(pThis, pNetwork, pIfSender, INTNETTRUNKDIR_HOST, pSG);
3103 fDst &= ~INTNETTRUNKDIR_HOST;
3104 }
3105
3106 /*
3107 * Edit the source address so that it it's the same as the host.
3108 */
3109 /* ASSUME frame from IntNetR0IfSend! */
3110 AssertReturnVoid(pSG->cSegsUsed == 1);
3111 AssertReturnVoid(pSG->cbTotal >= sizeof(RTNETETHERHDR));
3112 AssertReturnVoid(pIfSender);
3113 PRTNETETHERHDR pEthHdr = (PRTNETETHERHDR)pSG->aSegs[0].pv;
3114
3115 pEthHdr->SrcMac = pThis->MacAddr;
3116
3117 /*
3118 * Deal with tags from the snooping phase.
3119 */
3120 if (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4)
3121 {
3122 /*
3123 * APR IPv4: replace hardware (MAC) addresses because these end up
3124 * in ARP caches. So, if we don't the other machines will
3125 * send the packets to the MAC address of the guest
3126 * instead of the one of the host, which won't work on
3127 * wireless of course...
3128 */
3129 PRTNETARPIPV4 pArp = (PRTNETARPIPV4)(pEthHdr + 1);
3130 if (!memcmp(&pArp->ar_sha, &pIfSender->MacAddr, sizeof(RTMAC)))
3131 {
3132 Log6(("tw: ar_sha %.6Rhxs -> %.6Rhxs\n", &pArp->ar_sha, &pThis->MacAddr));
3133 pArp->ar_sha = pThis->MacAddr;
3134 }
3135 if (!memcmp(&pArp->ar_tha, &pIfSender->MacAddr, sizeof(RTMAC))) /* just in case... */
3136 {
3137 Log6(("tw: ar_tha %.6Rhxs -> %.6Rhxs\n", &pArp->ar_tha, &pThis->MacAddr));
3138 pArp->ar_tha = pThis->MacAddr;
3139 }
3140 }
3141 else if (pEthHdr->EtherType == RT_H2N_U16_C(RTNET_ETHERTYPE_IPV6))
3142 {
3143 intnetR0TrunkSharedMacEditIPv6FromIntNet(pThis, pIfSender, pEthHdr, pSG->cbTotal);
3144 }
3145 }
3146
3147 /*
3148 * Send the frame, handling the GSO fallback.
3149 *
3150 * Note! The trunk implementation will re-check that the trunk is active
3151 * before sending, so we don't have to duplicate that effort here.
3152 */
3153 STAM_REL_PROFILE_START(&pIfSender->pIntBuf->StatSend2, a);
3154 int rc;
3155 if ( pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID
3156 || intnetR0TrunkIfCanHandleGsoFrame(pThis, pSG, fDst) )
3157 rc = pThis->pIfPort->pfnXmit(pThis->pIfPort, pIfSender->pvIfData, pSG, fDst);
3158 else
3159 rc = intnetR0TrunkIfSendGsoFallback(pThis, pIfSender, pSG, fDst);
3160 STAM_REL_PROFILE_STOP(&pIfSender->pIntBuf->StatSend2, a);
3161
3162 /** @todo failure statistics? */
3163 Log2(("intnetR0TrunkIfSend: %Rrc fDst=%d\n", rc, fDst)); NOREF(rc);
3164}
3165
3166
3167/**
3168 * Detect broadcasts packaged as unicast and convert them back to broadcast.
3169 *
3170 * WiFi routers try to use ethernet unicast instead of broadcast or
3171 * multicast when possible. Look inside the packet and fix up
3172 * ethernet destination to be proper broadcast or multicast if
3173 * necessary.
3174 *
3175 * @returns true broadcast (pEthHdr & pSG are modified), false if not.
3176 * @param pNetwork The network the frame is being sent to.
3177 * @param pSG Pointer to the gather list for the frame. The
3178 * ethernet destination address is modified when
3179 * returning true.
3180 * @param pEthHdr Pointer to the ethernet header. The ethernet
3181 * destination address is modified when returning true.
3182 */
3183static bool intnetR0NetworkSharedMacDetectAndFixBroadcast(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
3184{
3185 NOREF(pNetwork);
3186
3187 switch (pEthHdr->EtherType)
3188 {
3189 case RT_H2N_U16_C(RTNET_ETHERTYPE_ARP):
3190 {
3191 uint16_t ar_oper;
3192 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETARPHDR, ar_oper),
3193 sizeof(ar_oper), &ar_oper))
3194 return false;
3195
3196 if (ar_oper == RT_H2N_U16_C(RTNET_ARPOP_REQUEST))
3197 {
3198 /* change to broadcast */
3199 pEthHdr->DstMac.au16[0] = 0xffff;
3200 pEthHdr->DstMac.au16[1] = 0xffff;
3201 pEthHdr->DstMac.au16[2] = 0xffff;
3202 }
3203 else
3204 return false;
3205 break;
3206 }
3207
3208 case RT_H2N_U16_C(RTNET_ETHERTYPE_IPV4):
3209 {
3210 RTNETADDRIPV4 ip_dst;
3211 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV4, ip_dst),
3212 sizeof(ip_dst), &ip_dst))
3213 return false;
3214
3215 if (ip_dst.u == 0xffffffff) /* 255.255.255.255? */
3216 {
3217 /* change to broadcast */
3218 pEthHdr->DstMac.au16[0] = 0xffff;
3219 pEthHdr->DstMac.au16[1] = 0xffff;
3220 pEthHdr->DstMac.au16[2] = 0xffff;
3221 }
3222 else if ((ip_dst.au8[0] & 0xf0) == 0xe0) /* IPv4 multicast? */
3223 {
3224 /* change to 01:00:5e:xx:xx:xx multicast ... */
3225 pEthHdr->DstMac.au8[0] = 0x01;
3226 pEthHdr->DstMac.au8[1] = 0x00;
3227 pEthHdr->DstMac.au8[2] = 0x5e;
3228 /* ... with lower 23 bits from the multicast IP address */
3229 pEthHdr->DstMac.au8[3] = ip_dst.au8[1] & 0x7f;
3230 pEthHdr->DstMac.au8[4] = ip_dst.au8[2];
3231 pEthHdr->DstMac.au8[5] = ip_dst.au8[3];
3232 }
3233 else
3234 return false;
3235 break;
3236 }
3237
3238 case RT_H2N_U16_C(RTNET_ETHERTYPE_IPV6):
3239 {
3240 RTNETADDRIPV6 ip6_dst;
3241 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV6, ip6_dst),
3242 sizeof(ip6_dst), &ip6_dst))
3243 return false;
3244
3245 if (ip6_dst.au8[0] == 0xff) /* IPv6 multicast? */
3246 {
3247 pEthHdr->DstMac.au16[0] = 0x3333;
3248 pEthHdr->DstMac.au16[1] = ip6_dst.au16[6];
3249 pEthHdr->DstMac.au16[2] = ip6_dst.au16[7];
3250 }
3251 else
3252 return false;
3253 break;
3254 }
3255
3256 default:
3257 return false;
3258 }
3259
3260
3261 /*
3262 * Update ethernet destination in the segment.
3263 */
3264 intnetR0SgWritePart(pSG, RT_OFFSETOF(RTNETETHERHDR, DstMac), sizeof(pEthHdr->DstMac), &pEthHdr->DstMac);
3265
3266 return true;
3267}
3268
3269
3270/**
3271 * Snoops a multicast ICMPv6 ND DAD from the wire via the trunk connection.
3272 *
3273 * @param pNetwork The network the frame is being sent to.
3274 * @param pSG Pointer to the gather list for the frame.
3275 * @param pEthHdr Pointer to the ethernet header.
3276 */
3277static void intnetR0NetworkSnoopNAFromWire(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
3278{
3279 NOREF(pEthHdr);
3280
3281 /*
3282 * Check the minimum size and get a linear copy of the thing to work on,
3283 * using the temporary buffer if necessary.
3284 */
3285 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETIPV6) +
3286 sizeof(RTNETNDP)))
3287 return;
3288 PRTNETIPV6 pIPv6 = (PRTNETIPV6)((uint8_t *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
3289 if ( pSG->cSegsUsed != 1
3290 && pSG->aSegs[0].cb < sizeof(RTNETETHERHDR) + sizeof(RTNETIPV6) +
3291 sizeof(RTNETNDP))
3292 {
3293 Log6(("fw: Copying IPv6 pkt %u\n", sizeof(RTNETIPV6)));
3294 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), sizeof(RTNETIPV6)
3295 + sizeof(RTNETNDP), pNetwork->pbTmp))
3296 return;
3297 pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
3298 pIPv6 = (PRTNETIPV6)pNetwork->pbTmp;
3299 }
3300
3301 PCRTNETNDP pNd = (PCRTNETNDP) (pIPv6 + 1);
3302
3303 /*
3304 * a multicast NS with :: as source address means a DAD packet.
3305 * if it comes from the wire and we have the DAD'd address in our cache,
3306 * flush the entry as the address is being acquired by someone else on
3307 * the network.
3308 */
3309 if ( pIPv6->ip6_hlim == 0xff
3310 && pIPv6->ip6_nxt == RTNETIPV6_PROT_ICMPV6
3311 && pNd->Hdr.icmp6_type == RTNETIPV6_ICMP_TYPE_NS
3312 && pNd->Hdr.icmp6_code == 0
3313 && pIPv6->ip6_src.QWords.qw0 == 0
3314 && pIPv6->ip6_src.QWords.qw1 == 0)
3315 {
3316
3317 intnetR0NetworkAddrCacheDelete(pNetwork, (PCRTNETADDRU) &pNd->target_address,
3318 kIntNetAddrType_IPv6, sizeof(RTNETADDRIPV6), "tif/ip6");
3319 }
3320}
3321/**
3322 * Edits an ARP packet arriving from the wire via the trunk connection.
3323 *
3324 * @param pNetwork The network the frame is being sent to.
3325 * @param pSG Pointer to the gather list for the frame.
3326 * The flags and data content may be updated.
3327 * @param pEthHdr Pointer to the ethernet header. This may also be
3328 * updated if it's a unicast...
3329 */
3330static void intnetR0NetworkEditArpFromWire(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
3331{
3332 /*
3333 * Check the minimum size and get a linear copy of the thing to work on,
3334 * using the temporary buffer if necessary.
3335 */
3336 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4)))
3337 return;
3338 PRTNETARPIPV4 pArpIPv4 = (PRTNETARPIPV4)((uint8_t *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
3339 if ( pSG->cSegsUsed != 1
3340 && pSG->aSegs[0].cb < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4))
3341 {
3342 Log6(("fw: Copying ARP pkt %u\n", sizeof(RTNETARPIPV4)));
3343 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), sizeof(RTNETARPIPV4), pNetwork->pbTmp))
3344 return;
3345 pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
3346 pArpIPv4 = (PRTNETARPIPV4)pNetwork->pbTmp;
3347 }
3348
3349 /*
3350 * Ignore packets which doesn't interest us or we perceive as malformed.
3351 */
3352 if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
3353 || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
3354 || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
3355 || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
3356 return;
3357 uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
3358 if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
3359 && ar_oper != RTNET_ARPOP_REPLY))
3360 {
3361 Log6(("ar_oper=%#x\n", ar_oper));
3362 return;
3363 }
3364
3365 /* Tag it as ARP IPv4. */
3366 pSG->fFlags |= INTNETSG_FLAGS_ARP_IPV4;
3367
3368 /*
3369 * The thing we're interested in here is a reply to a query made by a guest
3370 * since we modified the MAC in the initial request the guest made.
3371 */
3372 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
3373 RTMAC MacAddrTrunk;
3374 if (pNetwork->MacTab.pTrunk)
3375 MacAddrTrunk = pNetwork->MacTab.pTrunk->MacAddr;
3376 else
3377 memset(&MacAddrTrunk, 0, sizeof(MacAddrTrunk));
3378 RTSpinlockRelease(pNetwork->hAddrSpinlock);
3379 if ( ar_oper == RTNET_ARPOP_REPLY
3380 && !memcmp(&pArpIPv4->ar_tha, &MacAddrTrunk, sizeof(RTMAC)))
3381 {
3382 PINTNETIF pIf = intnetR0NetworkAddrCacheLookupIf(pNetwork, (PCRTNETADDRU)&pArpIPv4->ar_tpa,
3383 kIntNetAddrType_IPv4, sizeof(pArpIPv4->ar_tpa));
3384 if (pIf)
3385 {
3386 Log6(("fw: ar_tha %.6Rhxs -> %.6Rhxs\n", &pArpIPv4->ar_tha, &pIf->MacAddr));
3387 pArpIPv4->ar_tha = pIf->MacAddr;
3388 if (!memcmp(&pEthHdr->DstMac, &MacAddrTrunk, sizeof(RTMAC)))
3389 {
3390 Log6(("fw: DstMac %.6Rhxs -> %.6Rhxs\n", &pEthHdr->DstMac, &pIf->MacAddr));
3391 pEthHdr->DstMac = pIf->MacAddr;
3392 if ((void *)pEthHdr != pSG->aSegs[0].pv)
3393 intnetR0SgWritePart(pSG, RT_OFFSETOF(RTNETETHERHDR, DstMac), sizeof(RTMAC), &pIf->MacAddr);
3394 }
3395 intnetR0BusyDecIf(pIf);
3396
3397 /* Write back the packet if we've been making changes to a buffered copy. */
3398 if (pSG->fFlags & INTNETSG_FLAGS_PKT_CP_IN_TMP)
3399 intnetR0SgWritePart(pSG, sizeof(RTNETETHERHDR), sizeof(PRTNETARPIPV4), pArpIPv4);
3400 }
3401 }
3402}
3403
3404
3405/**
3406 * Detects and edits an DHCP packet arriving from the internal net.
3407 *
3408 * @param pNetwork The network the frame is being sent to.
3409 * @param pSG Pointer to the gather list for the frame.
3410 * The flags and data content may be updated.
3411 * @param pEthHdr Pointer to the ethernet header. This may also be
3412 * updated if it's a unicast...
3413 */
3414static void intnetR0NetworkEditDhcpFromIntNet(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
3415{
3416 NOREF(pEthHdr);
3417
3418 /*
3419 * Check the minimum size and get a linear copy of the thing to work on,
3420 * using the temporary buffer if necessary.
3421 */
3422 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN))
3423 return;
3424 /*
3425 * Get a pointer to a linear copy of the full packet, using the
3426 * temporary buffer if necessary.
3427 */
3428 PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((PCRTNETETHERHDR)pSG->aSegs[0].pv + 1);
3429 uint32_t cbPacket = pSG->cbTotal - sizeof(RTNETETHERHDR);
3430 if (pSG->cSegsUsed > 1)
3431 {
3432 cbPacket = RT_MIN(cbPacket, INTNETNETWORK_TMP_SIZE);
3433 Log6(("intnetR0NetworkEditDhcpFromIntNet: Copying IPv4/UDP/DHCP pkt %u\n", cbPacket));
3434 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
3435 return;
3436 //pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
3437 pIpHdr = (PCRTNETIPV4)pNetwork->pbTmp;
3438 }
3439
3440 /*
3441 * Validate the IP header and find the UDP packet.
3442 */
3443 if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, pSG->cbTotal - sizeof(RTNETETHERHDR), true /*fCheckSum*/))
3444 {
3445 Log6(("intnetR0NetworkEditDhcpFromIntNet: bad ip header\n"));
3446 return;
3447 }
3448 size_t cbIpHdr = pIpHdr->ip_hl * 4;
3449 if ( pIpHdr->ip_p != RTNETIPV4_PROT_UDP /* DHCP is UDP. */
3450 || cbPacket < cbIpHdr + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN) /* Min DHCP packet len */
3451 return;
3452
3453 size_t cbUdpPkt = cbPacket - cbIpHdr;
3454 PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uintptr_t)pIpHdr + cbIpHdr);
3455 /* We are only interested in DHCP packets coming from client to server. */
3456 if ( RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPS
3457 || RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPC)
3458 return;
3459
3460 /*
3461 * Check if the DHCP message is valid and get the type.
3462 */
3463 if (!RTNetIPv4IsUDPValid(pIpHdr, pUdpHdr, pUdpHdr + 1, cbUdpPkt, true /*fCheckSum*/))
3464 {
3465 Log6(("intnetR0NetworkEditDhcpFromIntNet: Bad UDP packet\n"));
3466 return;
3467 }
3468 PCRTNETBOOTP pDhcp = (PCRTNETBOOTP)(pUdpHdr + 1);
3469 uint8_t bMsgType;
3470 if (!RTNetIPv4IsDHCPValid(pUdpHdr, pDhcp, cbUdpPkt - sizeof(*pUdpHdr), &bMsgType))
3471 {
3472 Log6(("intnetR0NetworkEditDhcpFromIntNet: Bad DHCP packet\n"));
3473 return;
3474 }
3475
3476 switch (bMsgType)
3477 {
3478 case RTNET_DHCP_MT_DISCOVER:
3479 case RTNET_DHCP_MT_REQUEST:
3480 /*
3481 * Must set the broadcast flag or we won't catch the respons.
3482 */
3483 if (!(pDhcp->bp_flags & RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST)))
3484 {
3485 Log6(("intnetR0NetworkEditDhcpFromIntNet: Setting broadcast flag in DHCP %#x, previously %x\n",
3486 bMsgType, pDhcp->bp_flags));
3487
3488 /* Patch flags */
3489 uint16_t uFlags = pDhcp->bp_flags | RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST);
3490 intnetR0SgWritePart(pSG, (uintptr_t)&pDhcp->bp_flags - (uintptr_t)pIpHdr + sizeof(RTNETETHERHDR), sizeof(uFlags), &uFlags);
3491
3492 /* Patch UDP checksum */
3493 if (pUdpHdr->uh_sum != 0)
3494 {
3495 uint32_t uChecksum = (uint32_t)~pUdpHdr->uh_sum + RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST);
3496 while (uChecksum >> 16)
3497 uChecksum = (uChecksum >> 16) + (uChecksum & 0xFFFF);
3498 uChecksum = ~uChecksum;
3499 intnetR0SgWritePart(pSG,
3500 (uintptr_t)&pUdpHdr->uh_sum - (uintptr_t)pIpHdr + sizeof(RTNETETHERHDR),
3501 sizeof(pUdpHdr->uh_sum),
3502 &uChecksum);
3503 }
3504 }
3505
3506#ifdef RT_OS_DARWIN
3507 /*
3508 * Work around little endian checksum issue in mac os x 10.7.0 GM.
3509 */
3510 if ( pIpHdr->ip_tos
3511 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_WORKAROUND_1))
3512 {
3513 /* Patch it. */
3514 uint8_t uTos = pIpHdr->ip_tos;
3515 uint8_t uZero = 0;
3516 intnetR0SgWritePart(pSG, sizeof(RTNETETHERHDR) + 1, sizeof(uZero), &uZero);
3517
3518 /* Patch the IP header checksum. */
3519 uint32_t uChecksum = (uint32_t)~pIpHdr->ip_sum - (uTos << 8);
3520 while (uChecksum >> 16)
3521 uChecksum = (uChecksum >> 16) + (uChecksum & 0xFFFF);
3522 uChecksum = ~uChecksum;
3523
3524 Log(("intnetR0NetworkEditDhcpFromIntNet: cleared ip_tos (was %#04x); ip_sum=%#06x -> %#06x\n",
3525 uTos, RT_BE2H_U16(pIpHdr->ip_sum), RT_BE2H_U16(uChecksum) ));
3526 intnetR0SgWritePart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV4, ip_sum),
3527 sizeof(pIpHdr->ip_sum), &uChecksum);
3528 }
3529#endif
3530 break;
3531 }
3532}
3533
3534
3535/**
3536 * Checks if the callers context is okay for sending to the specified
3537 * destinations.
3538 *
3539 * @returns true if it's okay, false if it isn't.
3540 * @param pNetwork The network.
3541 * @param pIfSender The interface sending or NULL if it's the trunk.
3542 * @param pDstTab The destination table.
3543 */
3544DECLINLINE(bool) intnetR0NetworkIsContextOk(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, PCINTNETDSTTAB pDstTab)
3545{
3546 NOREF(pNetwork);
3547
3548 /* Sending to the trunk is the problematic path. If the trunk is the
3549 sender we won't be sending to it, so no problem..
3550 Note! fTrunkDst may be set event if if the trunk is the sender. */
3551 if (!pIfSender)
3552 return true;
3553
3554 uint32_t const fTrunkDst = pDstTab->fTrunkDst;
3555 if (!fTrunkDst)
3556 return true;
3557
3558 /* ASSUMES: that the trunk won't change its report while we're checking. */
3559 PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
3560 if (pTrunk && (fTrunkDst & pTrunk->fNoPreemptDsts) == fTrunkDst)
3561 return true;
3562
3563 /* ASSUMES: That a preemption test detects HM contexts. (Will work on
3564 non-preemptive systems as well.) */
3565 if (RTThreadPreemptIsEnabled(NIL_RTTHREAD))
3566 return true;
3567 return false;
3568}
3569
3570
3571/**
3572 * Checks if the callers context is okay for doing a broadcast given the
3573 * specified source.
3574 *
3575 * @returns true if it's okay, false if it isn't.
3576 * @param pNetwork The network.
3577 * @param fSrc The source of the packet. (0 (intnet),
3578 * INTNETTRUNKDIR_HOST or INTNETTRUNKDIR_WIRE).
3579 */
3580DECLINLINE(bool) intnetR0NetworkIsContextOkForBroadcast(PINTNETNETWORK pNetwork, uint32_t fSrc)
3581{
3582 /* Sending to the trunk is the problematic path. If the trunk is the
3583 sender we won't be sending to it, so no problem. */
3584 if (fSrc)
3585 return true;
3586
3587 /* ASSUMES: That a preemption test detects HM contexts. (Will work on
3588 non-preemptive systems as well.) */
3589 if (RTThreadPreemptIsEnabled(NIL_RTTHREAD))
3590 return true;
3591
3592 /* PARANOIA: Grab the spinlock to make sure the trunk structure cannot be
3593 freed while we're touching it. */
3594 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
3595 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
3596
3597 bool fRc = !pTrunk
3598 || pTrunk->fNoPreemptDsts == (INTNETTRUNKDIR_HOST | INTNETTRUNKDIR_WIRE)
3599 || ( (!pNetwork->MacTab.fHostActive || (pTrunk->fNoPreemptDsts & INTNETTRUNKDIR_HOST) )
3600 && (!pNetwork->MacTab.fWireActive || (pTrunk->fNoPreemptDsts & INTNETTRUNKDIR_WIRE) ) );
3601
3602 RTSpinlockRelease(pNetwork->hAddrSpinlock);
3603
3604 return fRc;
3605}
3606
3607
3608/**
3609 * Check context, edit, snoop and switch a broadcast frame when sharing MAC
3610 * address on the wire.
3611 *
3612 * The caller must hold at least one interface on the network busy to prevent it
3613 * from destructing beath us.
3614 *
3615 * @param pNetwork The network the frame is being sent to.
3616 * @param fSrc The source of the packet. (0 (intnet),
3617 * INTNETTRUNKDIR_HOST or INTNETTRUNKDIR_WIRE).
3618 * @param pIfSender The sender interface, NULL if trunk. Used to
3619 * prevent sending an echo to the sender.
3620 * @param pSG Pointer to the gather list.
3621 * @param pEthHdr Pointer to the ethernet header.
3622 * @param pDstTab The destination output table.
3623 */
3624static INTNETSWDECISION intnetR0NetworkSharedMacFixAndSwitchBroadcast(PINTNETNETWORK pNetwork,
3625 uint32_t fSrc, PINTNETIF pIfSender,
3626 PINTNETSG pSG, PRTNETETHERHDR pEthHdr,
3627 PINTNETDSTTAB pDstTab)
3628{
3629 /*
3630 * Before doing any work here, we need to figure out if we can handle it
3631 * in the current context. The restrictions are solely on the trunk.
3632 *
3633 * Note! Since at least one interface is busy, there won't be any changes
3634 * to the parameters here (unless the trunk changes its capability
3635 * report, which it shouldn't).
3636 */
3637 if (!intnetR0NetworkIsContextOkForBroadcast(pNetwork, fSrc))
3638 return INTNETSWDECISION_BAD_CONTEXT;
3639
3640 /*
3641 * Check for ICMPv6 Neighbor Advertisements coming from the trunk.
3642 * If we see an advertisement for an IP in our cache, we can safely remove
3643 * it as the IP has probably moved.
3644 */
3645 if ( (fSrc & INTNETTRUNKDIR_WIRE)
3646 && RT_BE2H_U16(pEthHdr->EtherType) == RTNET_ETHERTYPE_IPV6
3647 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3648 intnetR0NetworkSnoopNAFromWire(pNetwork, pSG, pEthHdr);
3649
3650
3651 /*
3652 * Check for ARP packets from the wire since we'll have to make
3653 * modification to them if we're sharing the MAC address with the host.
3654 */
3655 if ( (fSrc & INTNETTRUNKDIR_WIRE)
3656 && RT_BE2H_U16(pEthHdr->EtherType) == RTNET_ETHERTYPE_ARP
3657 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3658 intnetR0NetworkEditArpFromWire(pNetwork, pSG, pEthHdr);
3659
3660 /*
3661 * Check for DHCP packets from the internal net since we'll have to set
3662 * broadcast flag in DHCP requests if we're sharing the MAC address with
3663 * the host. GSO is not applicable to DHCP traffic.
3664 */
3665 if ( !fSrc
3666 && RT_BE2H_U16(pEthHdr->EtherType) == RTNET_ETHERTYPE_IPV4
3667 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3668 intnetR0NetworkEditDhcpFromIntNet(pNetwork, pSG, pEthHdr);
3669
3670 /*
3671 * Snoop address info from packet originating from the trunk connection.
3672 */
3673 if (fSrc)
3674 {
3675#ifdef INTNET_WITH_DHCP_SNOOPING
3676 uint16_t EtherType = RT_BE2H_U16(pEthHdr->EtherType);
3677 if ( ( EtherType == RTNET_ETHERTYPE_IPV4 /* for DHCP */
3678 && pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN
3679 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID )
3680 || (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4) )
3681 intnetR0TrunkIfSnoopAddr(pNetwork, pSG, EtherType);
3682#else
3683 if (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4)
3684 intnetR0TrunkIfSnoopArp(pNetwork, pSG);
3685#endif
3686 }
3687
3688 /*
3689 * Create the broadcast destination table.
3690 */
3691 return intnetR0NetworkSwitchBroadcast(pNetwork, fSrc, pIfSender, pDstTab);
3692}
3693
3694
3695/**
3696 * Check context, snoop and switch a unicast frame using the network layer
3697 * address of the link layer one (when sharing MAC address on the wire).
3698 *
3699 * This function is only used for frames coming from the wire (trunk).
3700 *
3701 * @returns true if it's addressed to someone on the network, otherwise false.
3702 * @param pNetwork The network the frame is being sent to.
3703 * @param pSG Pointer to the gather list.
3704 * @param pEthHdr Pointer to the ethernet header.
3705 * @param pDstTab The destination output table.
3706 */
3707static INTNETSWDECISION intnetR0NetworkSharedMacFixAndSwitchUnicast(PINTNETNETWORK pNetwork, PINTNETSG pSG,
3708 PRTNETETHERHDR pEthHdr, PINTNETDSTTAB pDstTab)
3709{
3710 /*
3711 * Extract the network address from the packet.
3712 */
3713 RTNETADDRU Addr;
3714 INTNETADDRTYPE enmAddrType;
3715 uint8_t cbAddr;
3716 switch (RT_BE2H_U16(pEthHdr->EtherType))
3717 {
3718 case RTNET_ETHERTYPE_IPV4:
3719 if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV4, ip_dst), sizeof(Addr.IPv4), &Addr)))
3720 {
3721 Log(("intnetshareduni: failed to read ip_dst! cbTotal=%#x\n", pSG->cbTotal));
3722 return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3723 }
3724 enmAddrType = kIntNetAddrType_IPv4;
3725 cbAddr = sizeof(Addr.IPv4);
3726 Log6(("intnetshareduni: IPv4 %d.%d.%d.%d\n", Addr.au8[0], Addr.au8[1], Addr.au8[2], Addr.au8[3]));
3727 break;
3728
3729 case RTNET_ETHERTYPE_IPV6:
3730 if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV6, ip6_dst), sizeof(Addr.IPv6), &Addr)))
3731 {
3732 Log(("intnetshareduni: failed to read ip6_dst! cbTotal=%#x\n", pSG->cbTotal));
3733 return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3734 }
3735 enmAddrType = kIntNetAddrType_IPv6;
3736 cbAddr = sizeof(Addr.IPv6);
3737 break;
3738#if 0 /** @todo IntNet: implement IPX for wireless MAC sharing? */
3739 case RTNET_ETHERTYPE_IPX_1:
3740 case RTNET_ETHERTYPE_IPX_2:
3741 case RTNET_ETHERTYPE_IPX_3:
3742 if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPX, ipx_dstnet), sizeof(Addr.IPX), &Addr)))
3743 {
3744 Log(("intnetshareduni: failed to read ipx_dstnet! cbTotal=%#x\n", pSG->cbTotal));
3745 return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3746 }
3747 enmAddrType = kIntNetAddrType_IPX;
3748 cbAddr = sizeof(Addr.IPX);
3749 break;
3750#endif
3751
3752 /*
3753 * Treat ARP as broadcast (it shouldn't end up here normally,
3754 * so it goes last in the switch).
3755 */
3756 case RTNET_ETHERTYPE_ARP:
3757 Log6(("intnetshareduni: ARP\n"));
3758 /** @todo revisit this broadcasting of unicast ARP frames! */
3759 return intnetR0NetworkSharedMacFixAndSwitchBroadcast(pNetwork, INTNETTRUNKDIR_WIRE, NULL, pSG, pEthHdr, pDstTab);
3760
3761 /*
3762 * Unknown packets are sent to the trunk and any promiscuous interfaces.
3763 */
3764 default:
3765 {
3766 Log6(("intnetshareduni: unknown ethertype=%#x\n", RT_BE2H_U16(pEthHdr->EtherType)));
3767 return intnetR0NetworkSwitchTrunkAndPromisc(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3768 }
3769 }
3770
3771 /*
3772 * Do level-3 switching.
3773 */
3774 INTNETSWDECISION enmSwDecision = intnetR0NetworkSwitchLevel3(pNetwork, &pEthHdr->DstMac,
3775 enmAddrType, &Addr, cbAddr,
3776 INTNETTRUNKDIR_WIRE, pDstTab);
3777
3778#ifdef INTNET_WITH_DHCP_SNOOPING
3779 /*
3780 * Perform DHCP snooping. GSO is not applicable to DHCP traffic
3781 */
3782 if ( enmAddrType == kIntNetAddrType_IPv4
3783 && pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN
3784 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3785 intnetR0TrunkIfSnoopAddr(pNetwork, pSG, RT_BE2H_U16(pEthHdr->EtherType));
3786#endif /* INTNET_WITH_DHCP_SNOOPING */
3787
3788 return enmSwDecision;
3789}
3790
3791
3792/**
3793 * Release all the interfaces in the destination table when we realize that
3794 * we're in a context where we cannot get the job done.
3795 *
3796 * @param pNetwork The network.
3797 * @param pDstTab The destination table.
3798 */
3799static void intnetR0NetworkReleaseDstTab(PINTNETNETWORK pNetwork, PINTNETDSTTAB pDstTab)
3800{
3801 /* The trunk interface. */
3802 if (pDstTab->fTrunkDst)
3803 {
3804 PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
3805 if (pTrunk)
3806 intnetR0BusyDec(pNetwork, &pTrunk->cBusy);
3807 pDstTab->pTrunk = NULL;
3808 pDstTab->fTrunkDst = 0;
3809 }
3810
3811 /* Regular interfaces. */
3812 uint32_t iIf = pDstTab->cIfs;
3813 while (iIf-- > 0)
3814 {
3815 PINTNETIF pIf = pDstTab->aIfs[iIf].pIf;
3816 intnetR0BusyDecIf(pIf);
3817 pDstTab->aIfs[iIf].pIf = NULL;
3818 }
3819 pDstTab->cIfs = 0;
3820}
3821
3822
3823/**
3824 * Deliver the frame to the interfaces specified in the destination table.
3825 *
3826 * @param pNetwork The network.
3827 * @param pDstTab The destination table.
3828 * @param pSG The frame to send.
3829 * @param pIfSender The sender interface. NULL if it originated via
3830 * the trunk.
3831 */
3832static void intnetR0NetworkDeliver(PINTNETNETWORK pNetwork, PINTNETDSTTAB pDstTab, PINTNETSG pSG, PINTNETIF pIfSender)
3833{
3834 /*
3835 * Do the interfaces first before sending it to the wire and risk having to
3836 * modify it.
3837 */
3838 uint32_t iIf = pDstTab->cIfs;
3839 while (iIf-- > 0)
3840 {
3841 PINTNETIF pIf = pDstTab->aIfs[iIf].pIf;
3842 intnetR0IfSend(pIf, pIfSender, pSG,
3843 pDstTab->aIfs[iIf].fReplaceDstMac ? &pIf->MacAddr: NULL);
3844 intnetR0BusyDecIf(pIf);
3845 pDstTab->aIfs[iIf].pIf = NULL;
3846 }
3847 pDstTab->cIfs = 0;
3848
3849 /*
3850 * Send to the trunk.
3851 *
3852 * Note! The switching functions will include the trunk even when the frame
3853 * source is the trunk. This is because we need it to figure out
3854 * whether the other half of the trunk should see the frame or not
3855 * and let the caller know.
3856 *
3857 * So, we'll ignore trunk sends here if the frame origin is
3858 * INTNETTRUNKSWPORT::pfnRecv.
3859 */
3860 if (pDstTab->fTrunkDst)
3861 {
3862 PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
3863 if (pTrunk)
3864 {
3865 if (pIfSender)
3866 intnetR0TrunkIfSend(pTrunk, pNetwork, pIfSender, pDstTab->fTrunkDst, pSG);
3867 intnetR0BusyDec(pNetwork, &pTrunk->cBusy);
3868 }
3869 pDstTab->pTrunk = NULL;
3870 pDstTab->fTrunkDst = 0;
3871 }
3872}
3873
3874
3875/**
3876 * Sends a frame.
3877 *
3878 * This function will distribute the frame to the interfaces it is addressed to.
3879 * It will also update the MAC address of the sender.
3880 *
3881 * The caller must own the network mutex.
3882 *
3883 * @returns The switching decision.
3884 * @param pNetwork The network the frame is being sent to.
3885 * @param pIfSender The interface sending the frame. This is NULL if it's the trunk.
3886 * @param fSrc The source flags. This 0 if it's not from the trunk.
3887 * @param pSG Pointer to the gather list.
3888 * @param pDstTab The destination table to use.
3889 */
3890static INTNETSWDECISION intnetR0NetworkSend(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, uint32_t fSrc,
3891 PINTNETSG pSG, PINTNETDSTTAB pDstTab)
3892{
3893 /*
3894 * Assert reality.
3895 */
3896 AssertPtr(pNetwork);
3897 AssertPtrNull(pIfSender);
3898 Assert(pIfSender ? fSrc == 0 : fSrc != 0);
3899 Assert(!pIfSender || pNetwork == pIfSender->pNetwork);
3900 AssertPtr(pSG);
3901 Assert(pSG->cSegsUsed >= 1);
3902 Assert(pSG->cSegsUsed <= pSG->cSegsAlloc);
3903 if (pSG->cbTotal < sizeof(RTNETETHERHDR))
3904 return INTNETSWDECISION_INVALID;
3905
3906 /*
3907 * Get the ethernet header (might theoretically involve multiple segments).
3908 */
3909 RTNETETHERHDR EthHdr;
3910 if (pSG->aSegs[0].cb >= sizeof(EthHdr))
3911 EthHdr = *(PCRTNETETHERHDR)pSG->aSegs[0].pv;
3912 else if (!intnetR0SgReadPart(pSG, 0, sizeof(EthHdr), &EthHdr))
3913 return INTNETSWDECISION_INVALID;
3914 if ( (EthHdr.DstMac.au8[0] == 0x08 && EthHdr.DstMac.au8[1] == 0x00 && EthHdr.DstMac.au8[2] == 0x27)
3915 || (EthHdr.SrcMac.au8[0] == 0x08 && EthHdr.SrcMac.au8[1] == 0x00 && EthHdr.SrcMac.au8[2] == 0x27)
3916 || (EthHdr.DstMac.au8[0] == 0x00 && EthHdr.DstMac.au8[1] == 0x16 && EthHdr.DstMac.au8[2] == 0xcb)
3917 || (EthHdr.SrcMac.au8[0] == 0x00 && EthHdr.SrcMac.au8[1] == 0x16 && EthHdr.SrcMac.au8[2] == 0xcb)
3918 || EthHdr.DstMac.au8[0] == 0xff
3919 || EthHdr.SrcMac.au8[0] == 0xff)
3920 Log2(("D=%.6Rhxs S=%.6Rhxs T=%04x f=%x z=%x\n",
3921 &EthHdr.DstMac, &EthHdr.SrcMac, RT_BE2H_U16(EthHdr.EtherType), fSrc, pSG->cbTotal));
3922
3923 /*
3924 * Learn the MAC address of the sender. No re-learning as the interface
3925 * user will normally tell us the right MAC address.
3926 *
3927 * Note! We don't notify the trunk about these mainly because of the
3928 * problematic contexts we might be called in.
3929 */
3930 if (RT_UNLIKELY( pIfSender
3931 && !pIfSender->fMacSet
3932 && memcmp(&EthHdr.SrcMac, &pIfSender->MacAddr, sizeof(pIfSender->MacAddr))
3933 && !intnetR0IsMacAddrMulticast(&EthHdr.SrcMac)
3934 ))
3935 {
3936 Log2(("IF MAC: %.6Rhxs -> %.6Rhxs\n", &pIfSender->MacAddr, &EthHdr.SrcMac));
3937 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
3938
3939 PINTNETMACTABENTRY pIfEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIfSender);
3940 if (pIfEntry)
3941 pIfEntry->MacAddr = EthHdr.SrcMac;
3942 pIfSender->MacAddr = EthHdr.SrcMac;
3943
3944 RTSpinlockRelease(pNetwork->hAddrSpinlock);
3945 }
3946
3947 /*
3948 * Deal with MAC address sharing as that may required editing of the
3949 * packets before we dispatch them anywhere.
3950 */
3951 INTNETSWDECISION enmSwDecision;
3952 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
3953 {
3954 if (intnetR0IsMacAddrMulticast(&EthHdr.DstMac))
3955 enmSwDecision = intnetR0NetworkSharedMacFixAndSwitchBroadcast(pNetwork, fSrc, pIfSender, pSG, &EthHdr, pDstTab);
3956 else if (fSrc & INTNETTRUNKDIR_WIRE)
3957 {
3958 if (intnetR0NetworkSharedMacDetectAndFixBroadcast(pNetwork, pSG, &EthHdr))
3959 enmSwDecision = intnetR0NetworkSharedMacFixAndSwitchBroadcast(pNetwork, fSrc, pIfSender, pSG, &EthHdr, pDstTab);
3960 else
3961 enmSwDecision = intnetR0NetworkSharedMacFixAndSwitchUnicast(pNetwork, pSG, &EthHdr, pDstTab);
3962 }
3963 else
3964 enmSwDecision = intnetR0NetworkSwitchUnicast(pNetwork, fSrc, pIfSender, &EthHdr.DstMac, pDstTab);
3965 }
3966 else if (intnetR0IsMacAddrMulticast(&EthHdr.DstMac))
3967 enmSwDecision = intnetR0NetworkSwitchBroadcast(pNetwork, fSrc, pIfSender, pDstTab);
3968 else
3969 enmSwDecision = intnetR0NetworkSwitchUnicast(pNetwork, fSrc, pIfSender, &EthHdr.DstMac, pDstTab);
3970
3971 /*
3972 * Deliver to the destinations if we can.
3973 */
3974 if (enmSwDecision != INTNETSWDECISION_BAD_CONTEXT)
3975 {
3976 if (intnetR0NetworkIsContextOk(pNetwork, pIfSender, pDstTab))
3977 intnetR0NetworkDeliver(pNetwork, pDstTab, pSG, pIfSender);
3978 else
3979 {
3980 intnetR0NetworkReleaseDstTab(pNetwork, pDstTab);
3981 enmSwDecision = INTNETSWDECISION_BAD_CONTEXT;
3982 }
3983 }
3984
3985 return enmSwDecision;
3986}
3987
3988
3989/**
3990 * Sends one or more frames.
3991 *
3992 * The function will first the frame which is passed as the optional arguments
3993 * pvFrame and cbFrame. These are optional since it also possible to chain
3994 * together one or more frames in the send buffer which the function will
3995 * process after considering it's arguments.
3996 *
3997 * The caller is responsible for making sure that there are no concurrent calls
3998 * to this method (with the same handle).
3999 *
4000 * @returns VBox status code.
4001 * @param hIf The interface handle.
4002 * @param pSession The caller's session.
4003 */
4004INTNETR0DECL(int) IntNetR0IfSend(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession)
4005{
4006 Log5(("IntNetR0IfSend: hIf=%RX32\n", hIf));
4007
4008 /*
4009 * Validate input and translate the handle.
4010 */
4011 PINTNET pIntNet = g_pIntNet;
4012 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4013 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4014
4015 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4016 if (!pIf)
4017 return VERR_INVALID_HANDLE;
4018 STAM_REL_PROFILE_START(&pIf->pIntBuf->StatSend1, a);
4019
4020 /*
4021 * Make sure we've got a network.
4022 */
4023 int rc = VINF_SUCCESS;
4024 intnetR0BusyIncIf(pIf);
4025 PINTNETNETWORK pNetwork = pIf->pNetwork;
4026 if (RT_LIKELY(pNetwork))
4027 {
4028 /*
4029 * Grab the destination table.
4030 */
4031 PINTNETDSTTAB pDstTab = ASMAtomicXchgPtrT(&pIf->pDstTab, NULL, PINTNETDSTTAB);
4032 if (RT_LIKELY(pDstTab))
4033 {
4034 /*
4035 * Process the send buffer.
4036 */
4037 INTNETSWDECISION enmSwDecision = INTNETSWDECISION_BROADCAST;
4038 INTNETSG Sg; /** @todo this will have to be changed if we're going to use async sending
4039 * with buffer sharing for some OS or service. Darwin copies everything so
4040 * I won't bother allocating and managing SGs right now. Sorry. */
4041 PINTNETHDR pHdr;
4042 while ((pHdr = IntNetRingGetNextFrameToRead(&pIf->pIntBuf->Send)) != NULL)
4043 {
4044 uint8_t const u8Type = pHdr->u8Type;
4045 if (u8Type == INTNETHDR_TYPE_FRAME)
4046 {
4047 /* Send regular frame. */
4048 void *pvCurFrame = IntNetHdrGetFramePtr(pHdr, pIf->pIntBuf);
4049 IntNetSgInitTemp(&Sg, pvCurFrame, pHdr->cbFrame);
4050 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
4051 intnetR0IfSnoopAddr(pIf, (uint8_t *)pvCurFrame, pHdr->cbFrame, false /*fGso*/, (uint16_t *)&Sg.fFlags);
4052 enmSwDecision = intnetR0NetworkSend(pNetwork, pIf, 0 /*fSrc*/, &Sg, pDstTab);
4053 }
4054 else if (u8Type == INTNETHDR_TYPE_GSO)
4055 {
4056 /* Send GSO frame if sane. */
4057 PPDMNETWORKGSO pGso = IntNetHdrGetGsoContext(pHdr, pIf->pIntBuf);
4058 uint32_t cbFrame = pHdr->cbFrame - sizeof(*pGso);
4059 if (RT_LIKELY(PDMNetGsoIsValid(pGso, pHdr->cbFrame, cbFrame)))
4060 {
4061 void *pvCurFrame = pGso + 1;
4062 IntNetSgInitTempGso(&Sg, pvCurFrame, cbFrame, pGso);
4063 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
4064 intnetR0IfSnoopAddr(pIf, (uint8_t *)pvCurFrame, cbFrame, true /*fGso*/, (uint16_t *)&Sg.fFlags);
4065 enmSwDecision = intnetR0NetworkSend(pNetwork, pIf, 0 /*fSrc*/, &Sg, pDstTab);
4066 }
4067 else
4068 {
4069 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatBadFrames); /* ignore */
4070 enmSwDecision = INTNETSWDECISION_DROP;
4071 }
4072 }
4073 /* Unless it's a padding frame, we're getting babble from the producer. */
4074 else
4075 {
4076 if (u8Type != INTNETHDR_TYPE_PADDING)
4077 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatBadFrames); /* ignore */
4078 enmSwDecision = INTNETSWDECISION_DROP;
4079 }
4080 if (enmSwDecision == INTNETSWDECISION_BAD_CONTEXT)
4081 {
4082 rc = VERR_TRY_AGAIN;
4083 break;
4084 }
4085
4086 /* Skip to the next frame. */
4087 IntNetRingSkipFrame(&pIf->pIntBuf->Send);
4088 }
4089
4090 /*
4091 * Put back the destination table.
4092 */
4093 Assert(!pIf->pDstTab);
4094 ASMAtomicWritePtr(&pIf->pDstTab, pDstTab);
4095 }
4096 else
4097 rc = VERR_INTERNAL_ERROR_4;
4098 }
4099 else
4100 rc = VERR_INTERNAL_ERROR_3;
4101
4102 /*
4103 * Release the interface.
4104 */
4105 intnetR0BusyDecIf(pIf);
4106 STAM_REL_PROFILE_STOP(&pIf->pIntBuf->StatSend1, a);
4107 intnetR0IfRelease(pIf, pSession);
4108 return rc;
4109}
4110
4111
4112/**
4113 * VMMR0 request wrapper for IntNetR0IfSend.
4114 *
4115 * @returns see IntNetR0IfSend.
4116 * @param pSession The caller's session.
4117 * @param pReq The request packet.
4118 */
4119INTNETR0DECL(int) IntNetR0IfSendReq(PSUPDRVSESSION pSession, PINTNETIFSENDREQ pReq)
4120{
4121 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4122 return VERR_INVALID_PARAMETER;
4123 return IntNetR0IfSend(pReq->hIf, pSession);
4124}
4125
4126
4127/**
4128 * Maps the default buffer into ring 3.
4129 *
4130 * @returns VBox status code.
4131 * @param hIf The interface handle.
4132 * @param pSession The caller's session.
4133 * @param ppRing3Buf Where to store the address of the ring-3 mapping
4134 * (optional).
4135 * @param ppRing0Buf Where to store the address of the ring-0 mapping
4136 * (optional).
4137 */
4138INTNETR0DECL(int) IntNetR0IfGetBufferPtrs(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession,
4139 R3PTRTYPE(PINTNETBUF) *ppRing3Buf, R0PTRTYPE(PINTNETBUF) *ppRing0Buf)
4140{
4141 LogFlow(("IntNetR0IfGetBufferPtrs: hIf=%RX32 ppRing3Buf=%p ppRing0Buf=%p\n", hIf, ppRing3Buf, ppRing0Buf));
4142
4143 /*
4144 * Validate input.
4145 */
4146 PINTNET pIntNet = g_pIntNet;
4147 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4148 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4149
4150 AssertPtrNullReturn(ppRing3Buf, VERR_INVALID_PARAMETER);
4151 AssertPtrNullReturn(ppRing0Buf, VERR_INVALID_PARAMETER);
4152 if (ppRing3Buf)
4153 *ppRing3Buf = 0;
4154 if (ppRing0Buf)
4155 *ppRing0Buf = 0;
4156
4157 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4158 if (!pIf)
4159 return VERR_INVALID_HANDLE;
4160
4161 /*
4162 * ASSUMES that only the process that created an interface can use it.
4163 * ASSUMES that we created the ring-3 mapping when selecting or
4164 * allocating the buffer.
4165 */
4166 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
4167 if (RT_SUCCESS(rc))
4168 {
4169 if (ppRing3Buf)
4170 *ppRing3Buf = pIf->pIntBufR3;
4171 if (ppRing0Buf)
4172 *ppRing0Buf = (R0PTRTYPE(PINTNETBUF))pIf->pIntBuf; /* tstIntNetR0 mess */
4173
4174 rc = RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
4175 }
4176
4177 intnetR0IfRelease(pIf, pSession);
4178 LogFlow(("IntNetR0IfGetBufferPtrs: returns %Rrc *ppRing3Buf=%p *ppRing0Buf=%p\n",
4179 rc, ppRing3Buf ? *ppRing3Buf : NIL_RTR3PTR, ppRing0Buf ? *ppRing0Buf : NIL_RTR0PTR));
4180 return rc;
4181}
4182
4183
4184/**
4185 * VMMR0 request wrapper for IntNetR0IfGetBufferPtrs.
4186 *
4187 * @returns see IntNetR0IfGetRing3Buffer.
4188 * @param pSession The caller's session.
4189 * @param pReq The request packet.
4190 */
4191INTNETR0DECL(int) IntNetR0IfGetBufferPtrsReq(PSUPDRVSESSION pSession, PINTNETIFGETBUFFERPTRSREQ pReq)
4192{
4193 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4194 return VERR_INVALID_PARAMETER;
4195 return IntNetR0IfGetBufferPtrs(pReq->hIf, pSession, &pReq->pRing3Buf, &pReq->pRing0Buf);
4196}
4197
4198
4199#if 0
4200/**
4201 * Gets the physical addresses of the default interface buffer.
4202 *
4203 * @returns VBox status code.
4204 * @param hIF The interface handle.
4205 * @param paPages Where to store the addresses. (The reserved fields will be set to zero.)
4206 * @param cPages
4207 */
4208INTNETR0DECL(int) IntNetR0IfGetPhysBuffer(INTNETIFHANDLE hIf, PSUPPAGE paPages, unsigned cPages)
4209{
4210 /*
4211 * Validate input.
4212 */
4213 PINTNET pIntNet = g_pIntNet;
4214 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4215 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4216
4217 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
4218 AssertPtrReturn((uint8_t *)&paPages[cPages] - 1, VERR_INVALID_PARAMETER);
4219 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4220 if (!pIf)
4221 return VERR_INVALID_HANDLE;
4222
4223 /*
4224 * Grab the lock and get the data.
4225 * ASSUMES that the handle isn't closed while we're here.
4226 */
4227 int rc = RTSemFastMutexRequest(pIf->pNetwork->FastMutex);
4228 if (RT_SUCCESS(rc))
4229 {
4230 /** @todo make a SUPR0 api for obtaining the array. SUPR0/IPRT is keeping track of everything, there
4231 * is no need for any extra bookkeeping here.. */
4232
4233 rc = RTSemFastMutexRelease(pIf->pNetwork->FastMutex);
4234 }
4235 intnetR0IfRelease(pIf, pSession);
4236 return VERR_NOT_IMPLEMENTED;
4237}
4238#endif
4239
4240
4241/**
4242 * Sets the promiscuous mode property of an interface.
4243 *
4244 * @returns VBox status code.
4245 * @param hIf The interface handle.
4246 * @param pSession The caller's session.
4247 * @param fPromiscuous Set if the interface should be in promiscuous mode, clear if not.
4248 */
4249INTNETR0DECL(int) IntNetR0IfSetPromiscuousMode(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fPromiscuous)
4250{
4251 LogFlow(("IntNetR0IfSetPromiscuousMode: hIf=%RX32 fPromiscuous=%d\n", hIf, fPromiscuous));
4252
4253 /*
4254 * Validate & translate input.
4255 */
4256 PINTNET pIntNet = g_pIntNet;
4257 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4258 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4259
4260 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4261 if (!pIf)
4262 {
4263 Log(("IntNetR0IfSetPromiscuousMode: returns VERR_INVALID_HANDLE\n"));
4264 return VERR_INVALID_HANDLE;
4265 }
4266
4267 /*
4268 * Get the network, take the address spinlock, and make the change.
4269 * Paranoia^2: Mark ourselves busy to prevent anything from being destroyed.
4270 */
4271 int rc = VINF_SUCCESS;
4272 intnetR0BusyIncIf(pIf);
4273 PINTNETNETWORK pNetwork = pIf->pNetwork;
4274 if (pNetwork)
4275 {
4276 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4277
4278 if (pIf->fPromiscuousReal != fPromiscuous)
4279 {
4280 const bool fPromiscuousEff = fPromiscuous
4281 && (pIf->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW)
4282 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS);
4283 Log(("IntNetR0IfSetPromiscuousMode: hIf=%RX32: Changed from %d -> %d (%d)\n",
4284 hIf, !fPromiscuous, !!fPromiscuous, fPromiscuousEff));
4285
4286 pIf->fPromiscuousReal = fPromiscuous;
4287
4288 PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
4289 if (RT_LIKELY(pEntry))
4290 {
4291 if (pEntry->fPromiscuousEff)
4292 {
4293 pNetwork->MacTab.cPromiscuousEntries--;
4294 if (!pEntry->fPromiscuousSeeTrunk)
4295 pNetwork->MacTab.cPromiscuousNoTrunkEntries--;
4296 Assert(pNetwork->MacTab.cPromiscuousEntries < pNetwork->MacTab.cEntries);
4297 Assert(pNetwork->MacTab.cPromiscuousNoTrunkEntries < pNetwork->MacTab.cEntries);
4298 }
4299
4300 pEntry->fPromiscuousEff = fPromiscuousEff;
4301 pEntry->fPromiscuousSeeTrunk = fPromiscuousEff
4302 && (pIf->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK);
4303
4304 if (pEntry->fPromiscuousEff)
4305 {
4306 pNetwork->MacTab.cPromiscuousEntries++;
4307 if (!pEntry->fPromiscuousSeeTrunk)
4308 pNetwork->MacTab.cPromiscuousNoTrunkEntries++;
4309 }
4310 Assert(pNetwork->MacTab.cPromiscuousEntries <= pNetwork->MacTab.cEntries);
4311 Assert(pNetwork->MacTab.cPromiscuousNoTrunkEntries <= pNetwork->MacTab.cEntries);
4312 }
4313 }
4314
4315 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4316 }
4317 else
4318 rc = VERR_WRONG_ORDER;
4319
4320 intnetR0BusyDecIf(pIf);
4321 intnetR0IfRelease(pIf, pSession);
4322 return rc;
4323}
4324
4325
4326/**
4327 * VMMR0 request wrapper for IntNetR0IfSetPromiscuousMode.
4328 *
4329 * @returns see IntNetR0IfSetPromiscuousMode.
4330 * @param pSession The caller's session.
4331 * @param pReq The request packet.
4332 */
4333INTNETR0DECL(int) IntNetR0IfSetPromiscuousModeReq(PSUPDRVSESSION pSession, PINTNETIFSETPROMISCUOUSMODEREQ pReq)
4334{
4335 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4336 return VERR_INVALID_PARAMETER;
4337 return IntNetR0IfSetPromiscuousMode(pReq->hIf, pSession, pReq->fPromiscuous);
4338}
4339
4340
4341/**
4342 * Sets the MAC address of an interface.
4343 *
4344 * @returns VBox status code.
4345 * @param hIf The interface handle.
4346 * @param pSession The caller's session.
4347 * @param pMAC The new MAC address.
4348 */
4349INTNETR0DECL(int) IntNetR0IfSetMacAddress(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, PCRTMAC pMac)
4350{
4351 LogFlow(("IntNetR0IfSetMacAddress: hIf=%RX32 pMac=%p:{%.6Rhxs}\n", hIf, pMac, pMac));
4352
4353 /*
4354 * Validate & translate input.
4355 */
4356 PINTNET pIntNet = g_pIntNet;
4357 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4358 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4359
4360 AssertPtrReturn(pMac, VERR_INVALID_PARAMETER);
4361 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4362 if (!pIf)
4363 {
4364 Log(("IntNetR0IfSetMacAddress: returns VERR_INVALID_HANDLE\n"));
4365 return VERR_INVALID_HANDLE;
4366 }
4367
4368 /*
4369 * Get the network, take the address spinlock, and make the change.
4370 * Paranoia^2: Mark ourselves busy to prevent anything from being destroyed.
4371 */
4372 int rc = VINF_SUCCESS;
4373 intnetR0BusyIncIf(pIf);
4374 PINTNETNETWORK pNetwork = pIf->pNetwork;
4375 if (pNetwork)
4376 {
4377 PINTNETTRUNKIF pTrunk = NULL;
4378
4379 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4380
4381 if (memcmp(&pIf->MacAddr, pMac, sizeof(pIf->MacAddr)))
4382 {
4383 Log(("IntNetR0IfSetMacAddress: hIf=%RX32: Changed from %.6Rhxs -> %.6Rhxs\n",
4384 hIf, &pIf->MacAddr, pMac));
4385
4386 /* Update the two copies. */
4387 PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
4388 if (RT_LIKELY(pEntry))
4389 pEntry->MacAddr = *pMac;
4390 pIf->MacAddr = *pMac;
4391 pIf->fMacSet = true;
4392
4393 /* Grab a busy reference to the trunk so we release the lock before notifying it. */
4394 pTrunk = pNetwork->MacTab.pTrunk;
4395 if (pTrunk)
4396 intnetR0BusyIncTrunk(pTrunk);
4397 }
4398
4399 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4400
4401 if (pTrunk)
4402 {
4403 Log(("IntNetR0IfSetMacAddress: pfnNotifyMacAddress hIf=%RX32\n", hIf));
4404 PINTNETTRUNKIFPORT pIfPort = pTrunk->pIfPort;
4405 if (pIfPort)
4406 pIfPort->pfnNotifyMacAddress(pIfPort, pIf->pvIfData, pMac);
4407 intnetR0BusyDecTrunk(pTrunk);
4408 }
4409 }
4410 else
4411 rc = VERR_WRONG_ORDER;
4412
4413 intnetR0BusyDecIf(pIf);
4414 intnetR0IfRelease(pIf, pSession);
4415 return rc;
4416}
4417
4418
4419/**
4420 * VMMR0 request wrapper for IntNetR0IfSetMacAddress.
4421 *
4422 * @returns see IntNetR0IfSetMacAddress.
4423 * @param pSession The caller's session.
4424 * @param pReq The request packet.
4425 */
4426INTNETR0DECL(int) IntNetR0IfSetMacAddressReq(PSUPDRVSESSION pSession, PINTNETIFSETMACADDRESSREQ pReq)
4427{
4428 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4429 return VERR_INVALID_PARAMETER;
4430 return IntNetR0IfSetMacAddress(pReq->hIf, pSession, &pReq->Mac);
4431}
4432
4433
4434/**
4435 * Worker for intnetR0IfSetActive and intnetR0IfDestruct.
4436 *
4437 * This function will update the active interface count on the network and
4438 * activate or deactivate the trunk connection if necessary.
4439 *
4440 * The call must own the giant lock (we cannot take it here).
4441 *
4442 * @returns VBox status code.
4443 * @param pNetwork The network.
4444 * @param fIf The interface.
4445 * @param fActive What to do.
4446 */
4447static int intnetR0NetworkSetIfActive(PINTNETNETWORK pNetwork, PINTNETIF pIf, bool fActive)
4448{
4449 /* quick sanity check */
4450 AssertPtr(pNetwork);
4451 AssertPtr(pIf);
4452
4453 /*
4454 * The address spinlock of the network protects the variables, while the
4455 * big lock protects the calling of pfnSetState. Grab both lock at once
4456 * to save us the extra hassle.
4457 */
4458 PINTNETTRUNKIF pTrunk = NULL;
4459 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4460
4461 /*
4462 * Do the update.
4463 */
4464 if (pIf->fActive != fActive)
4465 {
4466 PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
4467 if (RT_LIKELY(pEntry))
4468 {
4469 pEntry->fActive = fActive;
4470 pIf->fActive = fActive;
4471
4472 if (fActive)
4473 {
4474 pNetwork->cActiveIFs++;
4475 if (pNetwork->cActiveIFs == 1)
4476 {
4477 pTrunk = pNetwork->MacTab.pTrunk;
4478 if (pTrunk)
4479 {
4480 pNetwork->MacTab.fHostActive = RT_BOOL(pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED);
4481 pNetwork->MacTab.fWireActive = RT_BOOL(pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED);
4482 }
4483 }
4484 }
4485 else
4486 {
4487 pNetwork->cActiveIFs--;
4488 if (pNetwork->cActiveIFs == 0)
4489 {
4490 pTrunk = pNetwork->MacTab.pTrunk;
4491 pNetwork->MacTab.fHostActive = false;
4492 pNetwork->MacTab.fWireActive = false;
4493 }
4494 }
4495 }
4496 }
4497
4498 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4499
4500 /*
4501 * Tell the trunk if necessary.
4502 * The wait for !busy is for the Solaris streams trunk driver (mostly).
4503 */
4504 if (pTrunk && pTrunk->pIfPort)
4505 {
4506 if (!fActive)
4507 intnetR0BusyWait(pNetwork, &pTrunk->cBusy);
4508
4509 pTrunk->pIfPort->pfnSetState(pTrunk->pIfPort, fActive ? INTNETTRUNKIFSTATE_ACTIVE : INTNETTRUNKIFSTATE_INACTIVE);
4510 }
4511
4512 return VINF_SUCCESS;
4513}
4514
4515
4516/**
4517 * Sets the active property of an interface.
4518 *
4519 * @returns VBox status code.
4520 * @param hIf The interface handle.
4521 * @param pSession The caller's session.
4522 * @param fActive The new state.
4523 */
4524INTNETR0DECL(int) IntNetR0IfSetActive(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fActive)
4525{
4526 LogFlow(("IntNetR0IfSetActive: hIf=%RX32 fActive=%RTbool\n", hIf, fActive));
4527
4528 /*
4529 * Validate & translate input.
4530 */
4531 PINTNET pIntNet = g_pIntNet;
4532 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4533 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4534
4535 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4536 if (!pIf)
4537 {
4538 Log(("IntNetR0IfSetActive: returns VERR_INVALID_HANDLE\n"));
4539 return VERR_INVALID_HANDLE;
4540 }
4541
4542 /*
4543 * Hand it to the network since it might involve the trunk and things are
4544 * tricky there wrt to locking order.
4545 *
4546 * 1. We take the giant lock here. This makes sure nobody is re-enabling
4547 * the network while we're pausing it and vice versa. This also enables
4548 * us to wait for the network to become idle before telling the trunk.
4549 * (Important on Solaris.)
4550 *
4551 * 2. For paranoid reasons, we grab a busy reference to the calling
4552 * interface. This is totally unnecessary but should hurt (when done
4553 * after grabbing the giant lock).
4554 */
4555 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
4556 if (RT_SUCCESS(rc))
4557 {
4558 intnetR0BusyIncIf(pIf);
4559
4560 PINTNETNETWORK pNetwork = pIf->pNetwork;
4561 if (pNetwork)
4562 rc = intnetR0NetworkSetIfActive(pNetwork, pIf, fActive);
4563 else
4564 rc = VERR_WRONG_ORDER;
4565
4566 intnetR0BusyDecIf(pIf);
4567 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
4568 }
4569
4570 intnetR0IfRelease(pIf, pSession);
4571 LogFlow(("IntNetR0IfSetActive: returns %Rrc\n", rc));
4572 return rc;
4573}
4574
4575
4576/**
4577 * VMMR0 request wrapper for IntNetR0IfSetActive.
4578 *
4579 * @returns see IntNetR0IfSetActive.
4580 * @param pIntNet The internal networking instance.
4581 * @param pSession The caller's session.
4582 * @param pReq The request packet.
4583 */
4584INTNETR0DECL(int) IntNetR0IfSetActiveReq(PSUPDRVSESSION pSession, PINTNETIFSETACTIVEREQ pReq)
4585{
4586 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4587 return VERR_INVALID_PARAMETER;
4588 return IntNetR0IfSetActive(pReq->hIf, pSession, pReq->fActive);
4589}
4590
4591
4592/**
4593 * Wait for the interface to get signaled.
4594 * The interface will be signaled when is put into the receive buffer.
4595 *
4596 * @returns VBox status code.
4597 * @param hIf The interface handle.
4598 * @param pSession The caller's session.
4599 * @param cMillies Number of milliseconds to wait. RT_INDEFINITE_WAIT should be
4600 * used if indefinite wait is desired.
4601 */
4602INTNETR0DECL(int) IntNetR0IfWait(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, uint32_t cMillies)
4603{
4604 Log4(("IntNetR0IfWait: hIf=%RX32 cMillies=%u\n", hIf, cMillies));
4605
4606 /*
4607 * Get and validate essential handles.
4608 */
4609 PINTNET pIntNet = g_pIntNet;
4610 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4611 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4612
4613 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4614 if (!pIf)
4615 {
4616 Log(("IntNetR0IfWait: returns VERR_INVALID_HANDLE\n"));
4617 return VERR_INVALID_HANDLE;
4618 }
4619
4620 const INTNETIFHANDLE hIfSelf = pIf->hIf;
4621 const RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
4622 const bool fDestroying = ASMAtomicReadBool(&pIf->fDestroying);
4623 if ( hIfSelf != hIf /* paranoia */
4624 || hRecvEvent == NIL_RTSEMEVENT
4625 || fDestroying
4626 )
4627 {
4628 Log(("IntNetR0IfWait: returns VERR_SEM_DESTROYED\n"));
4629 return VERR_SEM_DESTROYED;
4630 }
4631
4632 /*
4633 * It is tempting to check if there is data to be read here,
4634 * but the problem with such an approach is that it will cause
4635 * one unnecessary supervisor->user->supervisor trip. There is
4636 * already a slight risk for such, so no need to increase it.
4637 */
4638
4639 /*
4640 * Increment the number of waiters before starting the wait.
4641 * Upon wakeup we must assert reality, checking that we're not
4642 * already destroyed or in the process of being destroyed. This
4643 * code must be aligned with the waiting code in intnetR0IfDestruct.
4644 */
4645 ASMAtomicIncU32(&pIf->cSleepers);
4646 int rc = RTSemEventWaitNoResume(hRecvEvent, cMillies);
4647 if (pIf->hRecvEvent == hRecvEvent)
4648 {
4649 ASMAtomicDecU32(&pIf->cSleepers);
4650 if (!pIf->fDestroying)
4651 {
4652 if (intnetR0IfRelease(pIf, pSession))
4653 rc = VERR_SEM_DESTROYED;
4654 }
4655 else
4656 rc = VERR_SEM_DESTROYED;
4657 }
4658 else
4659 rc = VERR_SEM_DESTROYED;
4660 Log4(("IntNetR0IfWait: returns %Rrc\n", rc));
4661 return rc;
4662}
4663
4664
4665/**
4666 * VMMR0 request wrapper for IntNetR0IfWait.
4667 *
4668 * @returns see IntNetR0IfWait.
4669 * @param pSession The caller's session.
4670 * @param pReq The request packet.
4671 */
4672INTNETR0DECL(int) IntNetR0IfWaitReq(PSUPDRVSESSION pSession, PINTNETIFWAITREQ pReq)
4673{
4674 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4675 return VERR_INVALID_PARAMETER;
4676 return IntNetR0IfWait(pReq->hIf, pSession, pReq->cMillies);
4677}
4678
4679
4680/**
4681 * Wake up any threads waiting on the interface.
4682 *
4683 * @returns VBox status code.
4684 * @param hIf The interface handle.
4685 * @param pSession The caller's session.
4686 * @param fNoMoreWaits When set, no more waits are permitted.
4687 */
4688INTNETR0DECL(int) IntNetR0IfAbortWait(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fNoMoreWaits)
4689{
4690 Log4(("IntNetR0IfAbortWait: hIf=%RX32 fNoMoreWaits=%RTbool\n", hIf, fNoMoreWaits));
4691
4692 /*
4693 * Get and validate essential handles.
4694 */
4695 PINTNET pIntNet = g_pIntNet;
4696 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4697 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4698
4699 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4700 if (!pIf)
4701 {
4702 Log(("IntNetR0IfAbortWait: returns VERR_INVALID_HANDLE\n"));
4703 return VERR_INVALID_HANDLE;
4704 }
4705
4706 const INTNETIFHANDLE hIfSelf = pIf->hIf;
4707 const RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
4708 const bool fDestroying = ASMAtomicReadBool(&pIf->fDestroying);
4709 if ( hIfSelf != hIf /* paranoia */
4710 || hRecvEvent == NIL_RTSEMEVENT
4711 || fDestroying
4712 )
4713 {
4714 Log(("IntNetR0IfAbortWait: returns VERR_SEM_DESTROYED\n"));
4715 return VERR_SEM_DESTROYED;
4716 }
4717
4718 /*
4719 * Set fDestroying if requested to do so and then wake up all the sleeping
4720 * threads (usually just one). We leave the semaphore in the signalled
4721 * state so the next caller will return immediately.
4722 */
4723 if (fNoMoreWaits)
4724 ASMAtomicWriteBool(&pIf->fDestroying, true);
4725
4726 uint32_t cSleepers = ASMAtomicReadU32(&pIf->cSleepers) + 1;
4727 while (cSleepers-- > 0)
4728 {
4729 int rc = RTSemEventSignal(pIf->hRecvEvent);
4730 AssertRC(rc);
4731 }
4732
4733 Log4(("IntNetR0IfWait: returns %Rrc\n", VINF_SUCCESS));
4734 return VINF_SUCCESS;
4735}
4736
4737
4738/**
4739 * VMMR0 request wrapper for IntNetR0IfAbortWait.
4740 *
4741 * @returns see IntNetR0IfWait.
4742 * @param pSession The caller's session.
4743 * @param pReq The request packet.
4744 */
4745INTNETR0DECL(int) IntNetR0IfAbortWaitReq(PSUPDRVSESSION pSession, PINTNETIFABORTWAITREQ pReq)
4746{
4747 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4748 return VERR_INVALID_PARAMETER;
4749 return IntNetR0IfAbortWait(pReq->hIf, pSession, pReq->fNoMoreWaits);
4750}
4751
4752
4753/**
4754 * Close an interface.
4755 *
4756 * @returns VBox status code.
4757 * @param pIntNet The instance handle.
4758 * @param hIf The interface handle.
4759 * @param pSession The caller's session.
4760 */
4761INTNETR0DECL(int) IntNetR0IfClose(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession)
4762{
4763 LogFlow(("IntNetR0IfClose: hIf=%RX32\n", hIf));
4764
4765 /*
4766 * Validate and free the handle.
4767 */
4768 PINTNET pIntNet = g_pIntNet;
4769 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4770 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4771
4772 PINTNETIF pIf = (PINTNETIF)RTHandleTableFreeWithCtx(pIntNet->hHtIfs, hIf, pSession);
4773 if (!pIf)
4774 return VERR_INVALID_HANDLE;
4775
4776 /* Mark the handle as freed so intnetR0IfDestruct won't free it again. */
4777 ASMAtomicWriteU32(&pIf->hIf, INTNET_HANDLE_INVALID);
4778
4779 /*
4780 * Signal the event semaphore to wake up any threads in IntNetR0IfWait
4781 * and give them a moment to get out and release the interface.
4782 */
4783 uint32_t i = pIf->cSleepers;
4784 while (i-- > 0)
4785 {
4786 RTSemEventSignal(pIf->hRecvEvent);
4787 RTThreadYield();
4788 }
4789 RTSemEventSignal(pIf->hRecvEvent);
4790
4791 /*
4792 * Release the references to the interface object (handle + free lookup).
4793 */
4794 void *pvObj = pIf->pvObj;
4795 intnetR0IfRelease(pIf, pSession); /* (RTHandleTableFreeWithCtx) */
4796
4797 int rc = SUPR0ObjRelease(pvObj, pSession);
4798 LogFlow(("IntNetR0IfClose: returns %Rrc\n", rc));
4799 return rc;
4800}
4801
4802
4803/**
4804 * VMMR0 request wrapper for IntNetR0IfCloseReq.
4805 *
4806 * @returns see IntNetR0IfClose.
4807 * @param pSession The caller's session.
4808 * @param pReq The request packet.
4809 */
4810INTNETR0DECL(int) IntNetR0IfCloseReq(PSUPDRVSESSION pSession, PINTNETIFCLOSEREQ pReq)
4811{
4812 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4813 return VERR_INVALID_PARAMETER;
4814 return IntNetR0IfClose(pReq->hIf, pSession);
4815}
4816
4817
4818/**
4819 * Interface destructor callback.
4820 * This is called for reference counted objectes when the count reaches 0.
4821 *
4822 * @param pvObj The object pointer.
4823 * @param pvUser1 Pointer to the interface.
4824 * @param pvUser2 Pointer to the INTNET instance data.
4825 */
4826static DECLCALLBACK(void) intnetR0IfDestruct(void *pvObj, void *pvUser1, void *pvUser2)
4827{
4828 PINTNETIF pIf = (PINTNETIF)pvUser1;
4829 PINTNET pIntNet = (PINTNET)pvUser2;
4830 Log(("intnetR0IfDestruct: pvObj=%p pIf=%p pIntNet=%p hIf=%RX32\n", pvObj, pIf, pIntNet, pIf->hIf));
4831
4832 /*
4833 * We grab the INTNET create/open/destroy semaphore to make sure nobody is
4834 * adding or removing interface while we're in here. For paranoid reasons
4835 * we also mark the interface as destroyed here so any waiting threads can
4836 * take evasive action (theoretical case).
4837 */
4838 RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
4839 ASMAtomicWriteBool(&pIf->fDestroying, true);
4840
4841 /*
4842 * Delete the interface handle so the object no longer can be used.
4843 * (Can happen if the client didn't close its session.)
4844 */
4845 INTNETIFHANDLE hIf = ASMAtomicXchgU32(&pIf->hIf, INTNET_HANDLE_INVALID);
4846 if (hIf != INTNET_HANDLE_INVALID)
4847 {
4848 void *pvObj2 = RTHandleTableFreeWithCtx(pIntNet->hHtIfs, hIf, pIf->pSession); NOREF(pvObj2);
4849 AssertMsg(pvObj2 == pIf, ("%p, %p, hIf=%RX32 pSession=%p\n", pvObj2, pIf, hIf, pIf->pSession));
4850 }
4851
4852 /*
4853 * If we've got a network deactivate and detach ourselves from it. Because
4854 * of cleanup order we might have been orphaned by the network destructor.
4855 */
4856 PINTNETNETWORK pNetwork = pIf->pNetwork;
4857 if (pNetwork)
4858 {
4859 /* set inactive. */
4860 intnetR0NetworkSetIfActive(pNetwork, pIf, false /*fActive*/);
4861
4862 /* remove ourselves from the switch table. */
4863 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4864
4865 uint32_t iIf = pNetwork->MacTab.cEntries;
4866 while (iIf-- > 0)
4867 if (pNetwork->MacTab.paEntries[iIf].pIf == pIf)
4868 {
4869 if (pNetwork->MacTab.paEntries[iIf].fPromiscuousEff)
4870 {
4871 pNetwork->MacTab.cPromiscuousEntries--;
4872 if (!pNetwork->MacTab.paEntries[iIf].fPromiscuousSeeTrunk)
4873 pNetwork->MacTab.cPromiscuousNoTrunkEntries--;
4874 }
4875 Assert(pNetwork->MacTab.cPromiscuousEntries < pNetwork->MacTab.cEntries);
4876 Assert(pNetwork->MacTab.cPromiscuousNoTrunkEntries < pNetwork->MacTab.cEntries);
4877
4878 if (iIf + 1 < pNetwork->MacTab.cEntries)
4879 memmove(&pNetwork->MacTab.paEntries[iIf],
4880 &pNetwork->MacTab.paEntries[iIf + 1],
4881 (pNetwork->MacTab.cEntries - iIf - 1) * sizeof(pNetwork->MacTab.paEntries[0]));
4882 pNetwork->MacTab.cEntries--;
4883 break;
4884 }
4885
4886 /* recalc the min flags. */
4887 if (pIf->fOpenFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES)
4888 {
4889 uint32_t fMinFlags = 0;
4890 iIf = pNetwork->MacTab.cEntries;
4891 while (iIf-- > 0)
4892 {
4893 PINTNETIF pIf2 = pNetwork->MacTab.paEntries[iIf].pIf;
4894 if ( pIf2 /* paranoia */
4895 && (pIf2->fOpenFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES))
4896 fMinFlags |= pIf2->fOpenFlags & INTNET_OPEN_FLAGS_STRICT_MASK;
4897 }
4898 pNetwork->fMinFlags = fMinFlags;
4899 }
4900
4901 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
4902
4903 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4904
4905 /* Notify the trunk about the interface being destroyed. */
4906 if (pTrunk && pTrunk->pIfPort)
4907 pTrunk->pIfPort->pfnDisconnectInterface(pTrunk->pIfPort, pIf->pvIfData);
4908
4909 /* Wait for the interface to quiesce while we still can. */
4910 intnetR0BusyWait(pNetwork, &pIf->cBusy);
4911
4912 /* Release our reference to the network. */
4913 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4914 pIf->pNetwork = NULL;
4915 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4916
4917 SUPR0ObjRelease(pNetwork->pvObj, pIf->pSession);
4918 }
4919
4920 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
4921
4922 /*
4923 * Wakeup anyone waiting on this interface.
4924 *
4925 * We *must* make sure they have woken up properly and realized
4926 * that the interface is no longer valid.
4927 */
4928 if (pIf->hRecvEvent != NIL_RTSEMEVENT)
4929 {
4930 RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
4931 unsigned cMaxWait = 0x1000;
4932 while (pIf->cSleepers && cMaxWait-- > 0)
4933 {
4934 RTSemEventSignal(hRecvEvent);
4935 RTThreadYield();
4936 }
4937 if (pIf->cSleepers)
4938 {
4939 RTThreadSleep(1);
4940
4941 cMaxWait = pIf->cSleepers;
4942 while (pIf->cSleepers && cMaxWait-- > 0)
4943 {
4944 RTSemEventSignal(hRecvEvent);
4945 RTThreadSleep(10);
4946 }
4947 }
4948
4949 RTSemEventDestroy(hRecvEvent);
4950 pIf->hRecvEvent = NIL_RTSEMEVENT;
4951 }
4952
4953 /*
4954 * Unmap user buffer.
4955 */
4956 if (pIf->pIntBuf != pIf->pIntBufDefault)
4957 {
4958 /** @todo user buffer */
4959 }
4960
4961 /*
4962 * Unmap and Free the default buffer.
4963 */
4964 if (pIf->pIntBufDefault)
4965 {
4966 SUPR0MemFree(pIf->pSession, (RTHCUINTPTR)pIf->pIntBufDefault);
4967 pIf->pIntBufDefault = NULL;
4968 pIf->pIntBufDefaultR3 = 0;
4969 pIf->pIntBuf = NULL;
4970 pIf->pIntBufR3 = 0;
4971 }
4972
4973 /*
4974 * Free remaining resources
4975 */
4976 RTSpinlockDestroy(pIf->hRecvInSpinlock);
4977 pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
4978
4979 RTMemFree(pIf->pDstTab);
4980 pIf->pDstTab = NULL;
4981
4982 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End; i++)
4983 intnetR0IfAddrCacheDestroy(&pIf->aAddrCache[i]);
4984
4985 pIf->pvObj = NULL;
4986 RTMemFree(pIf);
4987}
4988
4989
4990/**
4991 * Creates a new network interface.
4992 *
4993 * The call must have opened the network for the new interface and is
4994 * responsible for closing it on failure. On success it must leave the network
4995 * opened so the interface destructor can close it.
4996 *
4997 * @returns VBox status code.
4998 * @param pNetwork The network, referenced. The reference is consumed on
4999 * success.
5000 * @param pSession The session handle.
5001 * @param cbSend The size of the send buffer.
5002 * @param cbRecv The size of the receive buffer.
5003 * @param fFlags The open network flags.
5004 * @param phIf Where to store the interface handle.
5005 */
5006static int intnetR0NetworkCreateIf(PINTNETNETWORK pNetwork, PSUPDRVSESSION pSession,
5007 unsigned cbSend, unsigned cbRecv, uint32_t fFlags,
5008 PINTNETIFHANDLE phIf)
5009{
5010 LogFlow(("intnetR0NetworkCreateIf: pNetwork=%p pSession=%p cbSend=%u cbRecv=%u fFlags=%#x phIf=%p\n",
5011 pNetwork, pSession, cbSend, cbRecv, fFlags, phIf));
5012
5013 /*
5014 * Assert input.
5015 */
5016 AssertPtr(pNetwork);
5017 AssertPtr(phIf);
5018
5019 /*
5020 * Adjust the flags with defaults for the interface policies.
5021 * Note: Main restricts promiscuous mode per interface.
5022 */
5023 uint32_t const fDefFlags = INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW
5024 | INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK;
5025 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkIfFlags); i++)
5026 if (!(fFlags & g_afIntNetOpenNetworkIfFlags[i].fPair))
5027 fFlags |= g_afIntNetOpenNetworkIfFlags[i].fPair & fDefFlags;
5028
5029 /*
5030 * Make sure that all destination tables as well as the have space of
5031 */
5032 int rc = intnetR0NetworkEnsureTabSpace(pNetwork);
5033 if (RT_FAILURE(rc))
5034 return rc;
5035
5036 /*
5037 * Allocate the interface and initialize it.
5038 */
5039 PINTNETIF pIf = (PINTNETIF)RTMemAllocZ(sizeof(*pIf));
5040 if (!pIf)
5041 return VERR_NO_MEMORY;
5042
5043 memset(&pIf->MacAddr, 0xff, sizeof(pIf->MacAddr)); /* broadcast */
5044 //pIf->fMacSet = false;
5045 //pIf->fPromiscuousReal = false;
5046 //pIf->fActive = false;
5047 //pIf->fDestroying = false;
5048 pIf->fOpenFlags = fFlags;
5049 //pIf->cYields = 0;
5050 //pIf->pIntBuf = 0;
5051 //pIf->pIntBufR3 = NIL_RTR3PTR;
5052 //pIf->pIntBufDefault = 0;
5053 //pIf->pIntBufDefaultR3 = NIL_RTR3PTR;
5054 pIf->hRecvEvent = NIL_RTSEMEVENT;
5055 //pIf->cSleepers = 0;
5056 pIf->hIf = INTNET_HANDLE_INVALID;
5057 pIf->pNetwork = pNetwork;
5058 pIf->pSession = pSession;
5059 //pIf->pvObj = NULL;
5060 //pIf->aAddrCache = {0};
5061 pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
5062 pIf->cBusy = 0;
5063 //pIf->pDstTab = NULL;
5064 //pIf->pvIfData = NULL;
5065
5066 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End && RT_SUCCESS(rc); i++)
5067 rc = intnetR0IfAddrCacheInit(&pIf->aAddrCache[i], (INTNETADDRTYPE)i,
5068 !!(pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE));
5069 if (RT_SUCCESS(rc))
5070 rc = intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, (PINTNETDSTTAB *)&pIf->pDstTab);
5071 if (RT_SUCCESS(rc))
5072 rc = RTSemEventCreate((PRTSEMEVENT)&pIf->hRecvEvent);
5073 if (RT_SUCCESS(rc))
5074 rc = RTSpinlockCreate(&pIf->hRecvInSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "hRecvInSpinlock");
5075 if (RT_SUCCESS(rc))
5076 {
5077 /*
5078 * Create the default buffer.
5079 */
5080 /** @todo adjust with minimums and apply defaults here. */
5081 cbRecv = RT_ALIGN(RT_MAX(cbRecv, sizeof(INTNETHDR) * 4), INTNETRINGBUF_ALIGNMENT);
5082 cbSend = RT_ALIGN(RT_MAX(cbSend, sizeof(INTNETHDR) * 4), INTNETRINGBUF_ALIGNMENT);
5083 const unsigned cbBuf = RT_ALIGN(sizeof(*pIf->pIntBuf), INTNETRINGBUF_ALIGNMENT) + cbRecv + cbSend;
5084 rc = SUPR0MemAlloc(pIf->pSession, cbBuf, (PRTR0PTR)&pIf->pIntBufDefault, (PRTR3PTR)&pIf->pIntBufDefaultR3);
5085 if (RT_SUCCESS(rc))
5086 {
5087 ASMMemZero32(pIf->pIntBufDefault, cbBuf); /** @todo I thought I specified these buggers as clearing the memory... */
5088
5089 pIf->pIntBuf = pIf->pIntBufDefault;
5090 pIf->pIntBufR3 = pIf->pIntBufDefaultR3;
5091 IntNetBufInit(pIf->pIntBuf, cbBuf, cbRecv, cbSend);
5092
5093 /*
5094 * Register the interface with the session and create a handle for it.
5095 */
5096 pIf->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_INTERNAL_NETWORK_INTERFACE,
5097 intnetR0IfDestruct, pIf, pNetwork->pIntNet);
5098 if (pIf->pvObj)
5099 {
5100 rc = RTHandleTableAllocWithCtx(pNetwork->pIntNet->hHtIfs, pIf, pSession, (uint32_t *)&pIf->hIf);
5101 if (RT_SUCCESS(rc))
5102 {
5103 /*
5104 * Finally add the interface to the network, consuming the
5105 * network reference of the caller.
5106 */
5107 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5108
5109 uint32_t iIf = pNetwork->MacTab.cEntries;
5110 Assert(iIf + 1 <= pNetwork->MacTab.cEntriesAllocated);
5111
5112 pNetwork->MacTab.paEntries[iIf].MacAddr = pIf->MacAddr;
5113 pNetwork->MacTab.paEntries[iIf].fActive = false;
5114 pNetwork->MacTab.paEntries[iIf].fPromiscuousEff = false;
5115 pNetwork->MacTab.paEntries[iIf].fPromiscuousSeeTrunk = false;
5116 pNetwork->MacTab.paEntries[iIf].pIf = pIf;
5117
5118 pNetwork->MacTab.cEntries = iIf + 1;
5119 pIf->pNetwork = pNetwork;
5120
5121 /*
5122 * Grab a busy reference (paranoia) to the trunk before releasing
5123 * the spinlock and then notify it about the new interface.
5124 */
5125 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
5126 if (pTrunk)
5127 intnetR0BusyIncTrunk(pTrunk);
5128
5129 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5130
5131 if (pTrunk)
5132 {
5133 Log(("intnetR0NetworkCreateIf: pfnConnectInterface hIf=%RX32\n", pIf->hIf));
5134 if (pTrunk->pIfPort)
5135 rc = pTrunk->pIfPort->pfnConnectInterface(pTrunk->pIfPort, pIf, &pIf->pvIfData);
5136 intnetR0BusyDecTrunk(pTrunk);
5137 }
5138 if (RT_SUCCESS(rc))
5139 {
5140 /*
5141 * We're good!
5142 */
5143 *phIf = pIf->hIf;
5144 Log(("intnetR0NetworkCreateIf: returns VINF_SUCCESS *phIf=%RX32 cbSend=%u cbRecv=%u cbBuf=%u\n",
5145 *phIf, pIf->pIntBufDefault->cbSend, pIf->pIntBufDefault->cbRecv, pIf->pIntBufDefault->cbBuf));
5146 return VINF_SUCCESS;
5147 }
5148 }
5149
5150 SUPR0ObjAddRef(pNetwork->pvObj, pSession);
5151 SUPR0ObjRelease(pIf->pvObj, pSession);
5152 LogFlow(("intnetR0NetworkCreateIf: returns %Rrc\n", rc));
5153 return rc;
5154 }
5155
5156 /* clean up */
5157 SUPR0MemFree(pIf->pSession, (RTHCUINTPTR)pIf->pIntBufDefault);
5158 pIf->pIntBufDefault = NULL;
5159 pIf->pIntBuf = NULL;
5160 }
5161 }
5162
5163 RTSpinlockDestroy(pIf->hRecvInSpinlock);
5164 pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
5165 RTSemEventDestroy(pIf->hRecvEvent);
5166 pIf->hRecvEvent = NIL_RTSEMEVENT;
5167 RTMemFree(pIf->pDstTab);
5168 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End; i++)
5169 intnetR0IfAddrCacheDestroy(&pIf->aAddrCache[i]);
5170 RTMemFree(pIf);
5171 LogFlow(("intnetR0NetworkCreateIf: returns %Rrc\n", rc));
5172 return rc;
5173}
5174
5175
5176/** @copydoc INTNETTRUNKSWPORT::pfnSetSGPhys */
5177static DECLCALLBACK(bool) intnetR0TrunkIfPortSetSGPhys(PINTNETTRUNKSWPORT pSwitchPort, bool fEnable)
5178{
5179 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5180 AssertMsgFailed(("Not implemented because it wasn't required on Darwin\n"));
5181 return ASMAtomicXchgBool(&pThis->fPhysSG, fEnable);
5182}
5183
5184
5185/** @copydoc INTNETTRUNKSWPORT::pfnReportMacAddress */
5186static DECLCALLBACK(void) intnetR0TrunkIfPortReportMacAddress(PINTNETTRUNKSWPORT pSwitchPort, PCRTMAC pMacAddr)
5187{
5188 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5189
5190 /*
5191 * Get the network instance and grab the address spinlock before making
5192 * any changes.
5193 */
5194 intnetR0BusyIncTrunk(pThis);
5195 PINTNETNETWORK pNetwork = pThis->pNetwork;
5196 if (pNetwork)
5197 {
5198 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5199
5200 pNetwork->MacTab.HostMac = *pMacAddr;
5201 pThis->MacAddr = *pMacAddr;
5202
5203 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5204 }
5205 else
5206 pThis->MacAddr = *pMacAddr;
5207 intnetR0BusyDecTrunk(pThis);
5208}
5209
5210
5211/** @copydoc INTNETTRUNKSWPORT::pfnReportPromiscuousMode */
5212static DECLCALLBACK(void) intnetR0TrunkIfPortReportPromiscuousMode(PINTNETTRUNKSWPORT pSwitchPort, bool fPromiscuous)
5213{
5214 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5215
5216 /*
5217 * Get the network instance and grab the address spinlock before making
5218 * any changes.
5219 */
5220 intnetR0BusyIncTrunk(pThis);
5221 PINTNETNETWORK pNetwork = pThis->pNetwork;
5222 if (pNetwork)
5223 {
5224 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5225
5226 pNetwork->MacTab.fHostPromiscuousReal = fPromiscuous
5227 || (pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE);
5228 pNetwork->MacTab.fHostPromiscuousEff = pNetwork->MacTab.fHostPromiscuousReal
5229 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST);
5230
5231 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5232 }
5233 intnetR0BusyDecTrunk(pThis);
5234}
5235
5236
5237/** @copydoc INTNETTRUNKSWPORT::pfnReportGsoCapabilities */
5238static DECLCALLBACK(void) intnetR0TrunkIfPortReportGsoCapabilities(PINTNETTRUNKSWPORT pSwitchPort,
5239 uint32_t fGsoCapabilities, uint32_t fDst)
5240{
5241 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5242
5243 for (unsigned iBit = PDMNETWORKGSOTYPE_END; iBit < 32; iBit++)
5244 Assert(!(fGsoCapabilities & RT_BIT_32(iBit)));
5245 Assert(!(fDst & ~INTNETTRUNKDIR_VALID_MASK));
5246 Assert(fDst);
5247
5248 if (fDst & INTNETTRUNKDIR_HOST)
5249 pThis->fHostGsoCapabilites = fGsoCapabilities;
5250
5251 if (fDst & INTNETTRUNKDIR_WIRE)
5252 pThis->fWireGsoCapabilites = fGsoCapabilities;
5253}
5254
5255
5256/** @copydoc INTNETTRUNKSWPORT::pfnReportNoPreemptDsts */
5257static DECLCALLBACK(void) intnetR0TrunkIfPortReportNoPreemptDsts(PINTNETTRUNKSWPORT pSwitchPort, uint32_t fNoPreemptDsts)
5258{
5259 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5260 Assert(!(fNoPreemptDsts & ~INTNETTRUNKDIR_VALID_MASK));
5261
5262 pThis->fNoPreemptDsts = fNoPreemptDsts;
5263}
5264
5265
5266/** @copydoc INTNETTRUNKSWPORT::pfnDisconnect */
5267static DECLCALLBACK(void) intnetR0TrunkIfPortDisconnect(PINTNETTRUNKSWPORT pSwitchPort, PINTNETTRUNKIFPORT pIfPort,
5268 PFNINTNETTRUNKIFPORTRELEASEBUSY pfnReleaseBusy)
5269{
5270 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5271
5272 /*
5273 * The caller has marked the trunk instance busy on his side before making
5274 * the call (see method docs) to let us safely grab the network and internal
5275 * network instance pointers without racing the network destruction code
5276 * (intnetR0TrunkIfDestroy (called by intnetR0TrunkIfDestroy) will wait for
5277 * the interface to stop being busy before setting pNetwork to NULL and
5278 * freeing up the resources).
5279 */
5280 PINTNETNETWORK pNetwork = pThis->pNetwork;
5281 if (pNetwork)
5282 {
5283 PINTNET pIntNet = pNetwork->pIntNet;
5284 Assert(pNetwork->pIntNet);
5285
5286 /*
5287 * We must decrease the callers busy count here to prevent deadlocking
5288 * when requesting the big mutex ownership. This will of course
5289 * unblock anyone stuck in intnetR0TrunkIfDestroy doing pfnWaitForIdle
5290 * (the other deadlock party), so we have to revalidate the network
5291 * pointer after taking ownership of the big mutex.
5292 */
5293 pfnReleaseBusy(pIfPort);
5294
5295 RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
5296
5297 if (intnetR0NetworkIsValid(pIntNet, pNetwork))
5298 {
5299 Assert(pNetwork->MacTab.pTrunk == pThis); /* Must be valid as long as tehre are no concurrent calls to this method. */
5300 Assert(pThis->pIfPort == pIfPort); /* Ditto */
5301
5302 /*
5303 * Disconnect the trunk and destroy it, similar to what is done int
5304 * intnetR0NetworkDestruct.
5305 */
5306 pIfPort->pfnSetState(pIfPort, INTNETTRUNKIFSTATE_DISCONNECTING);
5307
5308 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5309 pNetwork->MacTab.pTrunk = NULL;
5310 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5311
5312 intnetR0TrunkIfDestroy(pThis, pNetwork);
5313 }
5314
5315 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
5316 }
5317 /*
5318 * We must always release the busy reference.
5319 */
5320 else
5321 pfnReleaseBusy(pIfPort);
5322}
5323
5324
5325/** @copydoc INTNETTRUNKSWPORT::pfnPreRecv */
5326static DECLCALLBACK(INTNETSWDECISION) intnetR0TrunkIfPortPreRecv(PINTNETTRUNKSWPORT pSwitchPort,
5327 void const *pvSrc, size_t cbSrc, uint32_t fSrc)
5328{
5329 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5330
5331 /* assert some sanity */
5332 AssertPtr(pvSrc);
5333 AssertReturn(cbSrc >= 6, INTNETSWDECISION_BROADCAST);
5334 Assert(fSrc);
5335
5336 /*
5337 * Mark the trunk as busy, make sure we've got a network and that there are
5338 * some active interfaces around.
5339 */
5340 INTNETSWDECISION enmSwDecision = INTNETSWDECISION_TRUNK;
5341 intnetR0BusyIncTrunk(pThis);
5342 PINTNETNETWORK pNetwork = pThis->pNetwork;
5343 if (RT_LIKELY( pNetwork
5344 && pNetwork->cActiveIFs > 0 ))
5345 {
5346 /*
5347 * Lazy bird! No pre-switching of multicast and shared-MAC-on-wire.
5348 */
5349 PCRTNETETHERHDR pEthHdr = (PCRTNETETHERHDR)pvSrc;
5350 if (intnetR0IsMacAddrMulticast(&pEthHdr->DstMac))
5351 enmSwDecision = INTNETSWDECISION_BROADCAST;
5352 else if ( fSrc == INTNETTRUNKDIR_WIRE
5353 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE))
5354 enmSwDecision = INTNETSWDECISION_BROADCAST;
5355 else
5356 enmSwDecision = intnetR0NetworkPreSwitchUnicast(pNetwork,
5357 fSrc,
5358 cbSrc >= 12 ? &pEthHdr->SrcMac : NULL,
5359 &pEthHdr->DstMac);
5360 }
5361
5362 intnetR0BusyDecTrunk(pThis);
5363 return enmSwDecision;
5364}
5365
5366
5367/** @copydoc INTNETTRUNKSWPORT::pfnRecv */
5368static DECLCALLBACK(bool) intnetR0TrunkIfPortRecv(PINTNETTRUNKSWPORT pSwitchPort, void *pvIf, PINTNETSG pSG, uint32_t fSrc)
5369{
5370 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5371
5372 /* assert some sanity */
5373 AssertPtr(pSG);
5374 Assert(fSrc);
5375 NOREF(pvIf); /* later */
5376
5377 /*
5378 * Mark the trunk as busy, make sure we've got a network and that there are
5379 * some active interfaces around.
5380 */
5381 bool fRc = false /* don't drop it */;
5382 intnetR0BusyIncTrunk(pThis);
5383 PINTNETNETWORK pNetwork = pThis->pNetwork;
5384 if (RT_LIKELY( pNetwork
5385 && pNetwork->cActiveIFs > 0 ))
5386 {
5387 /*
5388 * Grab or allocate a destination table.
5389 */
5390 bool const fIntCtx = RTThreadPreemptIsEnabled(NIL_RTTHREAD) || RTThreadIsInInterrupt(NIL_RTTHREAD);
5391 unsigned iDstTab = 0;
5392 PINTNETDSTTAB pDstTab = NULL;
5393 RTSpinlockAcquire(pThis->hDstTabSpinlock);
5394 if (fIntCtx)
5395 {
5396 /* Interrupt or restricted context. */
5397 iDstTab = RTMpCpuIdToSetIndex(RTMpCpuId());
5398 iDstTab %= pThis->cIntDstTabs;
5399 pDstTab = pThis->apIntDstTabs[iDstTab];
5400 if (RT_LIKELY(pDstTab))
5401 pThis->apIntDstTabs[iDstTab] = NULL;
5402 else
5403 {
5404 iDstTab = pThis->cIntDstTabs;
5405 while (iDstTab-- > 0)
5406 {
5407 pDstTab = pThis->apIntDstTabs[iDstTab];
5408 if (pDstTab)
5409 {
5410 pThis->apIntDstTabs[iDstTab] = NULL;
5411 break;
5412 }
5413 }
5414 }
5415 RTSpinlockRelease(pThis->hDstTabSpinlock);
5416 Assert(!pDstTab || iDstTab < pThis->cIntDstTabs);
5417 }
5418 else
5419 {
5420 /* Task context, fallback is to allocate a table. */
5421 AssertCompile(RT_ELEMENTS(pThis->apTaskDstTabs) == 2); /* for loop rollout */
5422 pDstTab = pThis->apIntDstTabs[iDstTab = 0];
5423 if (!pDstTab)
5424 pDstTab = pThis->apIntDstTabs[iDstTab = 1];
5425 if (pDstTab)
5426 {
5427 pThis->apIntDstTabs[iDstTab] = NULL;
5428 RTSpinlockRelease(pThis->hDstTabSpinlock);
5429 Assert(iDstTab < RT_ELEMENTS(pThis->apTaskDstTabs));
5430 }
5431 else
5432 {
5433 RTSpinlockRelease(pThis->hDstTabSpinlock);
5434 intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, &pDstTab);
5435 iDstTab = 65535;
5436 }
5437 }
5438 if (RT_LIKELY(pDstTab))
5439 {
5440 /*
5441 * Finally, get down to business of sending the frame.
5442 */
5443 INTNETSWDECISION enmSwDecision = intnetR0NetworkSend(pNetwork, NULL, fSrc, pSG, pDstTab);
5444 AssertMsg(enmSwDecision != INTNETSWDECISION_BAD_CONTEXT, ("fSrc=%#x fTrunkDst=%#x hdr=%.14Rhxs\n", fSrc, pDstTab->fTrunkDst, pSG->aSegs[0].pv));
5445 if (enmSwDecision == INTNETSWDECISION_INTNET)
5446 fRc = true; /* drop it */
5447
5448 /*
5449 * Free the destination table.
5450 */
5451 if (iDstTab == 65535)
5452 RTMemFree(pDstTab);
5453 else
5454 {
5455 RTSpinlockAcquire(pThis->hDstTabSpinlock);
5456 if (fIntCtx && !pThis->apIntDstTabs[iDstTab])
5457 pThis->apIntDstTabs[iDstTab] = pDstTab;
5458 else if (!fIntCtx && !pThis->apTaskDstTabs[iDstTab])
5459 pThis->apTaskDstTabs[iDstTab] = pDstTab;
5460 else
5461 {
5462 /* this shouldn't happen! */
5463 PINTNETDSTTAB *papDstTabs = fIntCtx ? &pThis->apIntDstTabs[0] : &pThis->apTaskDstTabs[0];
5464 iDstTab = fIntCtx ? pThis->cIntDstTabs : RT_ELEMENTS(pThis->apTaskDstTabs);
5465 while (iDstTab-- > 0)
5466 if (!papDstTabs[iDstTab])
5467 {
5468 papDstTabs[iDstTab] = pDstTab;
5469 break;
5470 }
5471 }
5472 RTSpinlockRelease(pThis->hDstTabSpinlock);
5473 Assert(iDstTab < RT_MAX(RT_ELEMENTS(pThis->apTaskDstTabs), pThis->cIntDstTabs));
5474 }
5475 }
5476 }
5477
5478 intnetR0BusyDecTrunk(pThis);
5479 return fRc;
5480}
5481
5482
5483/** @copydoc INTNETTRUNKSWPORT::pfnSGRetain */
5484static DECLCALLBACK(void) intnetR0TrunkIfPortSGRetain(PINTNETTRUNKSWPORT pSwitchPort, PINTNETSG pSG)
5485{
5486 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5487 PINTNETNETWORK pNetwork = pThis->pNetwork;
5488
5489 /* assert some sanity */
5490 AssertPtrReturnVoid(pNetwork);
5491 AssertReturnVoid(pNetwork->hEvtBusyIf != NIL_RTSEMEVENT);
5492 AssertPtr(pSG);
5493 Assert(pSG->cUsers > 0 && pSG->cUsers < 256);
5494
5495 /* do it. */
5496 ++pSG->cUsers;
5497}
5498
5499
5500/** @copydoc INTNETTRUNKSWPORT::pfnSGRelease */
5501static DECLCALLBACK(void) intnetR0TrunkIfPortSGRelease(PINTNETTRUNKSWPORT pSwitchPort, PINTNETSG pSG)
5502{
5503 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5504 PINTNETNETWORK pNetwork = pThis->pNetwork;
5505
5506 /* assert some sanity */
5507 AssertPtrReturnVoid(pNetwork);
5508 AssertReturnVoid(pNetwork->hEvtBusyIf != NIL_RTSEMEVENT);
5509 AssertPtr(pSG);
5510 Assert(pSG->cUsers > 0);
5511
5512 /*
5513 * Free it?
5514 */
5515 if (!--pSG->cUsers)
5516 {
5517 /** @todo later */
5518 }
5519}
5520
5521
5522/** @copydoc INTNETTRUNKSWPORT::pfnNotifyHostAddress */
5523static DECLCALLBACK(void) intnetR0NetworkNotifyHostAddress(PINTNETTRUNKSWPORT pSwitchPort,
5524 bool fAdded,
5525 INTNETADDRTYPE enmType, const void *pvAddr)
5526{
5527 PINTNETTRUNKIF pTrunkIf = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5528 PINTNETNETWORK pNetwork = pTrunkIf->pNetwork;
5529 PCRTNETADDRU pAddr = (PCRTNETADDRU)pvAddr;
5530 uint8_t cbAddr;
5531
5532 if (enmType == kIntNetAddrType_IPv4)
5533 {
5534 Log(("%s: %s %RTnaipv4\n",
5535 __FUNCTION__, (fAdded ? "add" : "del"),
5536 pAddr->IPv4));
5537 cbAddr = 4;
5538 }
5539 else if (enmType == kIntNetAddrType_IPv6)
5540 {
5541 Log(("%s: %s %RTnaipv6\n",
5542 __FUNCTION__, (fAdded ? "add" : "del"),
5543 pAddr));
5544 cbAddr = 16;
5545 }
5546 else
5547 {
5548 Log(("%s: unexpected address type %d\n", __FUNCTION__, enmType));
5549 return;
5550 }
5551
5552 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5553 if (fAdded) /* one of host interfaces got a new address */
5554 {
5555 /* blacklist it to prevent spoofing by guests */
5556 intnetR0NetworkBlacklistAdd(pNetwork, pAddr, enmType);
5557
5558 /* kick out any guest that uses it */
5559 intnetR0NetworkAddrCacheDeleteLocked(pNetwork, pAddr, enmType, cbAddr, "tif/host");
5560 }
5561 else /* address deleted from one of host interfaces */
5562 {
5563 /* stop blacklisting it, guests may use it now */
5564 intnetR0NetworkBlacklistDelete(pNetwork, pAddr, enmType);
5565 }
5566 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5567}
5568
5569
5570/**
5571 * Shutdown the trunk interface.
5572 *
5573 * @param pThis The trunk.
5574 * @param pNetworks The network.
5575 *
5576 * @remarks The caller must hold the global lock.
5577 */
5578static void intnetR0TrunkIfDestroy(PINTNETTRUNKIF pThis, PINTNETNETWORK pNetwork)
5579{
5580 /* assert sanity */
5581 if (!pThis)
5582 return;
5583 AssertPtr(pThis);
5584 Assert(pThis->pNetwork == pNetwork);
5585 AssertPtrNull(pThis->pIfPort);
5586
5587 /*
5588 * The interface has already been deactivated, we just to wait for
5589 * it to become idle before we can disconnect and release it.
5590 */
5591 PINTNETTRUNKIFPORT pIfPort = pThis->pIfPort;
5592 if (pIfPort)
5593 {
5594 /* unset it */
5595 pThis->pIfPort = NULL;
5596
5597 /* wait in portions so we can complain every now an then. */
5598 uint64_t StartTS = RTTimeSystemNanoTS();
5599 int rc = pIfPort->pfnWaitForIdle(pIfPort, 10*1000);
5600 if (RT_FAILURE(rc))
5601 {
5602 LogRel(("intnet: '%s' didn't become idle in %RU64 ns (%Rrc).\n",
5603 pNetwork->szName, RTTimeSystemNanoTS() - StartTS, rc));
5604 Assert(rc == VERR_TIMEOUT);
5605 while ( RT_FAILURE(rc)
5606 && RTTimeSystemNanoTS() - StartTS < UINT64_C(30000000000)) /* 30 sec */
5607 rc = pIfPort->pfnWaitForIdle(pIfPort, 10*1000);
5608 if (rc == VERR_TIMEOUT)
5609 {
5610 LogRel(("intnet: '%s' didn't become idle in %RU64 ns (%Rrc).\n",
5611 pNetwork->szName, RTTimeSystemNanoTS() - StartTS, rc));
5612 while ( rc == VERR_TIMEOUT
5613 && RTTimeSystemNanoTS() - StartTS < UINT64_C(360000000000)) /* 360 sec */
5614 rc = pIfPort->pfnWaitForIdle(pIfPort, 30*1000);
5615 if (RT_FAILURE(rc))
5616 {
5617 LogRel(("intnet: '%s' didn't become idle in %RU64 ns (%Rrc), giving up.\n",
5618 pNetwork->szName, RTTimeSystemNanoTS() - StartTS, rc));
5619 AssertRC(rc);
5620 }
5621 }
5622 }
5623
5624 /* disconnect & release it. */
5625 pIfPort->pfnDisconnectAndRelease(pIfPort);
5626 }
5627
5628 /*
5629 * Free up the resources.
5630 */
5631 pThis->pNetwork = NULL; /* Must not be cleared while busy, see intnetR0TrunkIfPortDisconnect. */
5632 RTSpinlockDestroy(pThis->hDstTabSpinlock);
5633 for (unsigned i = 0; i < RT_ELEMENTS(pThis->apTaskDstTabs); i++)
5634 {
5635 Assert(pThis->apTaskDstTabs[i]);
5636 RTMemFree(pThis->apTaskDstTabs[i]);
5637 pThis->apTaskDstTabs[i] = NULL;
5638 }
5639 for (unsigned i = 0; i < pThis->cIntDstTabs; i++)
5640 {
5641 Assert(pThis->apIntDstTabs[i]);
5642 RTMemFree(pThis->apIntDstTabs[i]);
5643 pThis->apIntDstTabs[i] = NULL;
5644 }
5645 RTMemFree(pThis);
5646}
5647
5648
5649/**
5650 * Creates the trunk connection (if any).
5651 *
5652 * @returns VBox status code.
5653 *
5654 * @param pNetwork The newly created network.
5655 * @param pSession The session handle.
5656 */
5657static int intnetR0NetworkCreateTrunkIf(PINTNETNETWORK pNetwork, PSUPDRVSESSION pSession)
5658{
5659 const char *pszName;
5660 switch (pNetwork->enmTrunkType)
5661 {
5662 /*
5663 * The 'None' case, simple.
5664 */
5665 case kIntNetTrunkType_None:
5666 case kIntNetTrunkType_WhateverNone:
5667#ifdef VBOX_WITH_NAT_SERVICE
5668 /*
5669 * Well, here we don't want load anything special,
5670 * just communicate between processes via internal network.
5671 */
5672 case kIntNetTrunkType_SrvNat:
5673#endif
5674 return VINF_SUCCESS;
5675
5676 /* Can't happen, but makes GCC happy. */
5677 default:
5678 return VERR_NOT_IMPLEMENTED;
5679
5680 /*
5681 * Translate enum to component factory name.
5682 */
5683 case kIntNetTrunkType_NetFlt:
5684 pszName = "VBoxNetFlt";
5685 break;
5686 case kIntNetTrunkType_NetAdp:
5687#if defined(RT_OS_DARWIN) && !defined(VBOXNETADP_DO_NOT_USE_NETFLT)
5688 pszName = "VBoxNetFlt";
5689#else /* VBOXNETADP_DO_NOT_USE_NETFLT */
5690 pszName = "VBoxNetAdp";
5691#endif /* VBOXNETADP_DO_NOT_USE_NETFLT */
5692 break;
5693#ifndef VBOX_WITH_NAT_SERVICE
5694 case kIntNetTrunkType_SrvNat:
5695 pszName = "VBoxSrvNat";
5696 break;
5697#endif
5698 }
5699
5700 /*
5701 * Allocate the trunk interface and associated destination tables.
5702 *
5703 * We take a very optimistic view on the parallelism of the host
5704 * network stack and NIC driver. So, we allocate one table for each
5705 * possible CPU to deal with interrupt time requests and one for task
5706 * time calls.
5707 */
5708 RTCPUID cCpus = RTMpGetCount(); Assert(cCpus > 0);
5709 PINTNETTRUNKIF pTrunk = (PINTNETTRUNKIF)RTMemAllocZ(RT_OFFSETOF(INTNETTRUNKIF, apIntDstTabs[cCpus]));
5710 if (!pTrunk)
5711 return VERR_NO_MEMORY;
5712
5713 Assert(pNetwork->MacTab.cEntriesAllocated > 0);
5714 int rc = VINF_SUCCESS;
5715 pTrunk->cIntDstTabs = cCpus;
5716 for (unsigned i = 0; i < cCpus && RT_SUCCESS(rc); i++)
5717 rc = intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, &pTrunk->apIntDstTabs[i]);
5718 for (unsigned i = 0; i < RT_ELEMENTS(pTrunk->apTaskDstTabs) && RT_SUCCESS(rc); i++)
5719 rc = intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, &pTrunk->apTaskDstTabs[i]);
5720
5721 if (RT_SUCCESS(rc))
5722 {
5723 pTrunk->SwitchPort.u32Version = INTNETTRUNKSWPORT_VERSION;
5724 pTrunk->SwitchPort.pfnPreRecv = intnetR0TrunkIfPortPreRecv;
5725 pTrunk->SwitchPort.pfnRecv = intnetR0TrunkIfPortRecv;
5726 pTrunk->SwitchPort.pfnSGRetain = intnetR0TrunkIfPortSGRetain;
5727 pTrunk->SwitchPort.pfnSGRelease = intnetR0TrunkIfPortSGRelease;
5728 pTrunk->SwitchPort.pfnSetSGPhys = intnetR0TrunkIfPortSetSGPhys;
5729 pTrunk->SwitchPort.pfnReportMacAddress = intnetR0TrunkIfPortReportMacAddress;
5730 pTrunk->SwitchPort.pfnReportPromiscuousMode = intnetR0TrunkIfPortReportPromiscuousMode;
5731 pTrunk->SwitchPort.pfnReportGsoCapabilities = intnetR0TrunkIfPortReportGsoCapabilities;
5732 pTrunk->SwitchPort.pfnReportNoPreemptDsts = intnetR0TrunkIfPortReportNoPreemptDsts;
5733 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
5734 pTrunk->SwitchPort.pfnNotifyHostAddress = intnetR0NetworkNotifyHostAddress;
5735 pTrunk->SwitchPort.pfnDisconnect = intnetR0TrunkIfPortDisconnect;
5736 pTrunk->SwitchPort.u32VersionEnd = INTNETTRUNKSWPORT_VERSION;
5737 //pTrunk->pIfPort = NULL;
5738 pTrunk->pNetwork = pNetwork;
5739 pTrunk->MacAddr.au8[0] = 0xff;
5740 pTrunk->MacAddr.au8[1] = 0xff;
5741 pTrunk->MacAddr.au8[2] = 0xff;
5742 pTrunk->MacAddr.au8[3] = 0xff;
5743 pTrunk->MacAddr.au8[4] = 0xff;
5744 pTrunk->MacAddr.au8[5] = 0xff;
5745 //pTrunk->fPhysSG = false;
5746 //pTrunk->fUnused = false;
5747 //pTrunk->cBusy = 0;
5748 //pTrunk->fNoPreemptDsts = 0;
5749 //pTrunk->fWireGsoCapabilites = 0;
5750 //pTrunk->fHostGsoCapabilites = 0;
5751 //pTrunk->abGsoHdrs = {0};
5752 pTrunk->hDstTabSpinlock = NIL_RTSPINLOCK;
5753 //pTrunk->apTaskDstTabs = above;
5754 //pTrunk->cIntDstTabs = above;
5755 //pTrunk->apIntDstTabs = above;
5756
5757 /*
5758 * Create the lock (we've NIL'ed the members above to simplify cleanup).
5759 */
5760 rc = RTSpinlockCreate(&pTrunk->hDstTabSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "hDstTabSpinlock");
5761 if (RT_SUCCESS(rc))
5762 {
5763 /*
5764 * There are a couple of bits in MacTab as well pertaining to the
5765 * trunk. We have to set this before it's reported.
5766 *
5767 * Note! We don't need to lock the MacTab here - creation time.
5768 */
5769 pNetwork->MacTab.pTrunk = pTrunk;
5770 pNetwork->MacTab.HostMac = pTrunk->MacAddr;
5771 pNetwork->MacTab.fHostPromiscuousReal = false;
5772 pNetwork->MacTab.fHostPromiscuousEff = (pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE)
5773 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST);
5774 pNetwork->MacTab.fHostActive = false;
5775 pNetwork->MacTab.fWirePromiscuousReal = RT_BOOL(pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE);
5776 pNetwork->MacTab.fWirePromiscuousEff = pNetwork->MacTab.fWirePromiscuousReal
5777 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE);
5778 pNetwork->MacTab.fWireActive = false;
5779
5780#ifdef IN_RING0 /* (testcase is ring-3) */
5781 /*
5782 * Query the factory we want, then use it create and connect the trunk.
5783 */
5784 PINTNETTRUNKFACTORY pTrunkFactory = NULL;
5785 rc = SUPR0ComponentQueryFactory(pSession, pszName, INTNETTRUNKFACTORY_UUID_STR, (void **)&pTrunkFactory);
5786 if (RT_SUCCESS(rc))
5787 {
5788 rc = pTrunkFactory->pfnCreateAndConnect(pTrunkFactory,
5789 pNetwork->szTrunk,
5790 &pTrunk->SwitchPort,
5791 pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE
5792 ? INTNETTRUNKFACTORY_FLAG_NO_PROMISC
5793 : 0,
5794 &pTrunk->pIfPort);
5795 pTrunkFactory->pfnRelease(pTrunkFactory);
5796 if (RT_SUCCESS(rc))
5797 {
5798 Assert(pTrunk->pIfPort);
5799
5800 Log(("intnetR0NetworkCreateTrunkIf: VINF_SUCCESS - pszName=%s szTrunk=%s%s Network=%s\n",
5801 pszName, pNetwork->szTrunk, pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE ? " shared-mac" : "", pNetwork->szName));
5802 return VINF_SUCCESS;
5803 }
5804 }
5805#else /* IN_RING3 */
5806 NOREF(pSession);
5807 rc = VERR_NOT_SUPPORTED;
5808#endif /* IN_RING3 */
5809
5810 pNetwork->MacTab.pTrunk = NULL;
5811 }
5812
5813 /* bail out and clean up. */
5814 RTSpinlockDestroy(pTrunk->hDstTabSpinlock);
5815 }
5816
5817 for (unsigned i = 0; i < RT_ELEMENTS(pTrunk->apTaskDstTabs); i++)
5818 RTMemFree(pTrunk->apTaskDstTabs[i]);
5819 for (unsigned i = 0; i < pTrunk->cIntDstTabs; i++)
5820 RTMemFree(pTrunk->apIntDstTabs[i]);
5821 RTMemFree(pTrunk);
5822
5823 LogFlow(("intnetR0NetworkCreateTrunkIf: %Rrc - pszName=%s szTrunk=%s Network=%s\n",
5824 rc, pszName, pNetwork->szTrunk, pNetwork->szName));
5825 return rc;
5826}
5827
5828
5829
5830/**
5831 * Object destructor callback.
5832 * This is called for reference counted objectes when the count reaches 0.
5833 *
5834 * @param pvObj The object pointer.
5835 * @param pvUser1 Pointer to the network.
5836 * @param pvUser2 Pointer to the INTNET instance data.
5837 */
5838static DECLCALLBACK(void) intnetR0NetworkDestruct(void *pvObj, void *pvUser1, void *pvUser2)
5839{
5840 PINTNETNETWORK pNetwork = (PINTNETNETWORK)pvUser1;
5841 PINTNET pIntNet = (PINTNET)pvUser2;
5842 Log(("intnetR0NetworkDestruct: pvObj=%p pNetwork=%p pIntNet=%p %s\n", pvObj, pNetwork, pIntNet, pNetwork->szName));
5843 Assert(pNetwork->pIntNet == pIntNet);
5844
5845 /* Take the big create/open/destroy sem. */
5846 RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
5847
5848 /*
5849 * Tell the trunk, if present, that we're about to disconnect it and wish
5850 * no further calls from it.
5851 */
5852 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
5853 if (pTrunk)
5854 pTrunk->pIfPort->pfnSetState(pTrunk->pIfPort, INTNETTRUNKIFSTATE_DISCONNECTING);
5855
5856 /*
5857 * Deactivate and orphan any remaining interfaces and wait for them to idle.
5858 *
5859 * Note! Normally there are no more interfaces at this point, however, when
5860 * supdrvCloseSession / supdrvCleanupSession release the objects the
5861 * order is undefined. So, it's quite possible that the network will
5862 * be dereference and destroyed before the interfaces.
5863 */
5864 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5865
5866 uint32_t iIf = pNetwork->MacTab.cEntries;
5867 while (iIf-- > 0)
5868 {
5869 pNetwork->MacTab.paEntries[iIf].fActive = false;
5870 pNetwork->MacTab.paEntries[iIf].pIf->fActive = false;
5871 }
5872
5873 pNetwork->MacTab.fHostActive = false;
5874 pNetwork->MacTab.fWireActive = false;
5875
5876 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5877
5878 /* Wait for all the interfaces to quiesce. (Interfaces cannot be
5879 removed / added since we're holding the big lock.) */
5880 if (pTrunk)
5881 intnetR0BusyWait(pNetwork, &pTrunk->cBusy);
5882
5883 iIf = pNetwork->MacTab.cEntries;
5884 while (iIf-- > 0)
5885 intnetR0BusyWait(pNetwork, &pNetwork->MacTab.paEntries[iIf].pIf->cBusy);
5886
5887 /* Orphan the interfaces (not trunk). Don't bother with calling
5888 pfnDisconnectInterface here since the networking is going away. */
5889 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5890 while ((iIf = pNetwork->MacTab.cEntries) > 0)
5891 {
5892 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf - 1].pIf;
5893 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5894
5895 intnetR0BusyWait(pNetwork, &pIf->cBusy);
5896
5897 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5898 if ( iIf == pNetwork->MacTab.cEntries /* paranoia */
5899 && pIf->cBusy)
5900 {
5901 pIf->pNetwork = NULL;
5902 pNetwork->MacTab.cEntries--;
5903 }
5904 }
5905
5906 /*
5907 * Zap the trunk pointer while we still own the spinlock, destroy the
5908 * trunk after we've left it. Note that this might take a while...
5909 */
5910 pNetwork->MacTab.pTrunk = NULL;
5911
5912 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5913
5914 if (pTrunk)
5915 intnetR0TrunkIfDestroy(pTrunk, pNetwork);
5916
5917 /*
5918 * Unlink the network.
5919 * Note that it needn't be in the list if we failed during creation.
5920 */
5921 PINTNETNETWORK pPrev = pIntNet->pNetworks;
5922 if (pPrev == pNetwork)
5923 pIntNet->pNetworks = pNetwork->pNext;
5924 else
5925 {
5926 for (; pPrev; pPrev = pPrev->pNext)
5927 if (pPrev->pNext == pNetwork)
5928 {
5929 pPrev->pNext = pNetwork->pNext;
5930 break;
5931 }
5932 }
5933 pNetwork->pNext = NULL;
5934 pNetwork->pvObj = NULL;
5935
5936 /*
5937 * Free resources.
5938 */
5939 RTSemEventDestroy(pNetwork->hEvtBusyIf);
5940 pNetwork->hEvtBusyIf = NIL_RTSEMEVENT;
5941 RTSpinlockDestroy(pNetwork->hAddrSpinlock);
5942 pNetwork->hAddrSpinlock = NIL_RTSPINLOCK;
5943 RTMemFree(pNetwork->MacTab.paEntries);
5944 pNetwork->MacTab.paEntries = NULL;
5945 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End; i++)
5946 intnetR0IfAddrCacheDestroy(&pNetwork->aAddrBlacklist[i]);
5947 RTMemFree(pNetwork);
5948
5949 /* Release the create/destroy sem. */
5950 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
5951}
5952
5953
5954/**
5955 * Checks if the open network flags are compatible.
5956 *
5957 * @returns VBox status code.
5958 * @param pNetwork The network.
5959 * @param fFlags The open network flags.
5960 */
5961static int intnetR0CheckOpenNetworkFlags(PINTNETNETWORK pNetwork, uint32_t fFlags)
5962{
5963 uint32_t const fNetFlags = pNetwork->fFlags;
5964
5965 if ( (fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
5966 ^ (fNetFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE))
5967 return VERR_INTNET_INCOMPATIBLE_FLAGS;
5968
5969 if (fFlags & INTNET_OPEN_FLAGS_REQUIRE_EXACT)
5970 {
5971 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
5972 if ( (fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair)
5973 && (fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair)
5974 != (fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fPair) )
5975 return VERR_INTNET_INCOMPATIBLE_FLAGS;
5976 }
5977
5978 if (fFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES)
5979 {
5980 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
5981 if ( (fFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive)
5982 && !(fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive)
5983 && (fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fFixed) )
5984 return VERR_INTNET_INCOMPATIBLE_FLAGS;
5985 }
5986
5987 return VINF_SUCCESS;
5988}
5989
5990
5991/**
5992 * Adapts flag changes on network opening.
5993 *
5994 * @returns VBox status code.
5995 * @param pNetwork The network.
5996 * @param fFlags The open network flags.
5997 */
5998static int intnetR0AdaptOpenNetworkFlags(PINTNETNETWORK pNetwork, uint32_t fFlags)
5999{
6000 /*
6001 * Upgrade the minimum policy flags.
6002 */
6003 uint32_t fNetMinFlags = pNetwork->fMinFlags;
6004 Assert(!(fNetMinFlags & INTNET_OPEN_FLAGS_RELAXED_MASK));
6005 if (fFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES)
6006 {
6007 fNetMinFlags |= fFlags & INTNET_OPEN_FLAGS_STRICT_MASK;
6008 if (fNetMinFlags != pNetwork->fMinFlags)
6009 {
6010 LogRel(("INTNET: %s - min flags changed %#x -> %#x\n", pNetwork->szName, pNetwork->fMinFlags, fNetMinFlags));
6011 pNetwork->fMinFlags = fNetMinFlags;
6012 }
6013 }
6014
6015 /*
6016 * Calculate the new network flags.
6017 * (Depends on fNetMinFlags being recalculated first.)
6018 */
6019 uint32_t fNetFlags = pNetwork->fFlags;
6020
6021 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
6022 {
6023 Assert(fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fPair);
6024 Assert(!(fNetMinFlags & g_afIntNetOpenNetworkNetFlags[i].fRelaxed));
6025
6026 if (!(fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair))
6027 continue;
6028 if (fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fFixed)
6029 continue;
6030
6031 if ( (fNetMinFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive)
6032 || (fFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive) )
6033 {
6034 fNetFlags &= ~g_afIntNetOpenNetworkNetFlags[i].fPair;
6035 fNetFlags |= g_afIntNetOpenNetworkNetFlags[i].fRestrictive;
6036 }
6037 else if (!(fFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES))
6038 {
6039 fNetFlags &= ~g_afIntNetOpenNetworkNetFlags[i].fPair;
6040 fNetFlags |= g_afIntNetOpenNetworkNetFlags[i].fRelaxed;
6041 }
6042 }
6043
6044 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
6045 {
6046 Assert(fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fPair);
6047 fNetFlags |= fFlags & g_afIntNetOpenNetworkNetFlags[i].fFixed;
6048 }
6049
6050 /*
6051 * Apply the flags if they changed.
6052 */
6053 uint32_t const fOldNetFlags = pNetwork->fFlags;
6054 if (fOldNetFlags != fNetFlags)
6055 {
6056 LogRel(("INTNET: %s - flags changed %#x -> %#x\n", pNetwork->szName, fOldNetFlags, fNetFlags));
6057
6058 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
6059
6060 pNetwork->fFlags = fNetFlags;
6061
6062 /* Recalculate some derived switcher variables. */
6063 bool fActiveTrunk = pNetwork->MacTab.pTrunk
6064 && pNetwork->cActiveIFs > 0;
6065 pNetwork->MacTab.fHostActive = fActiveTrunk
6066 && (fNetFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED);
6067 pNetwork->MacTab.fHostPromiscuousEff = ( pNetwork->MacTab.fHostPromiscuousReal
6068 || (fNetFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE))
6069 && (fNetFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST);
6070
6071 pNetwork->MacTab.fWireActive = fActiveTrunk
6072 && (fNetFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED);
6073 pNetwork->MacTab.fWirePromiscuousReal= RT_BOOL(fNetFlags & INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE);
6074 pNetwork->MacTab.fWirePromiscuousEff = pNetwork->MacTab.fWirePromiscuousReal
6075 && (fNetFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE);
6076
6077 if ((fOldNetFlags ^ fNetFlags) & INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS)
6078 {
6079 pNetwork->MacTab.cPromiscuousEntries = 0;
6080 pNetwork->MacTab.cPromiscuousNoTrunkEntries = 0;
6081
6082 uint32_t iIf = pNetwork->MacTab.cEntries;
6083 while (iIf-- > 0)
6084 {
6085 PINTNETMACTABENTRY pEntry = &pNetwork->MacTab.paEntries[iIf];
6086 PINTNETIF pIf2 = pEntry->pIf;
6087 if ( pIf2 /* paranoia */
6088 && pIf2->fPromiscuousReal)
6089 {
6090 bool fPromiscuousEff = (fNetFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS)
6091 && (pIf2->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW);
6092 pEntry->fPromiscuousEff = fPromiscuousEff;
6093 pEntry->fPromiscuousSeeTrunk = fPromiscuousEff
6094 && (pIf2->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK);
6095
6096 if (pEntry->fPromiscuousEff)
6097 {
6098 pNetwork->MacTab.cPromiscuousEntries++;
6099 if (!pEntry->fPromiscuousSeeTrunk)
6100 pNetwork->MacTab.cPromiscuousNoTrunkEntries++;
6101 }
6102 }
6103 }
6104 }
6105
6106 RTSpinlockRelease(pNetwork->hAddrSpinlock);
6107 }
6108
6109 return VINF_SUCCESS;
6110}
6111
6112
6113/**
6114 * Opens an existing network.
6115 *
6116 * The call must own the INTNET::hMtxCreateOpenDestroy.
6117 *
6118 * @returns VBox status code.
6119 * @param pIntNet The instance data.
6120 * @param pSession The current session.
6121 * @param pszNetwork The network name. This has a valid length.
6122 * @param enmTrunkType The trunk type.
6123 * @param pszTrunk The trunk name. Its meaning is specific to the type.
6124 * @param fFlags Flags, see INTNET_OPEN_FLAGS_*.
6125 * @param ppNetwork Where to store the pointer to the network on success.
6126 */
6127static int intnetR0OpenNetwork(PINTNET pIntNet, PSUPDRVSESSION pSession, const char *pszNetwork, INTNETTRUNKTYPE enmTrunkType,
6128 const char *pszTrunk, uint32_t fFlags, PINTNETNETWORK *ppNetwork)
6129{
6130 LogFlow(("intnetR0OpenNetwork: pIntNet=%p pSession=%p pszNetwork=%p:{%s} enmTrunkType=%d pszTrunk=%p:{%s} fFlags=%#x ppNetwork=%p\n",
6131 pIntNet, pSession, pszNetwork, pszNetwork, enmTrunkType, pszTrunk, pszTrunk, fFlags, ppNetwork));
6132
6133 /* just pro forma validation, the caller is internal. */
6134 AssertPtr(pIntNet);
6135 AssertPtr(pSession);
6136 AssertPtr(pszNetwork);
6137 Assert(enmTrunkType > kIntNetTrunkType_Invalid && enmTrunkType < kIntNetTrunkType_End);
6138 AssertPtr(pszTrunk);
6139 Assert(!(fFlags & ~INTNET_OPEN_FLAGS_MASK));
6140 AssertPtr(ppNetwork);
6141 *ppNetwork = NULL;
6142
6143 /*
6144 * Search networks by name.
6145 */
6146 PINTNETNETWORK pCur;
6147 uint8_t cchName = (uint8_t)strlen(pszNetwork);
6148 Assert(cchName && cchName < sizeof(pCur->szName)); /* caller ensures this */
6149
6150 pCur = pIntNet->pNetworks;
6151 while (pCur)
6152 {
6153 if ( pCur->cchName == cchName
6154 && !memcmp(pCur->szName, pszNetwork, cchName))
6155 {
6156 /*
6157 * Found the network, now check that we have the same ideas
6158 * about the trunk setup and security.
6159 */
6160 int rc;
6161 if ( enmTrunkType == kIntNetTrunkType_WhateverNone
6162#ifdef VBOX_WITH_NAT_SERVICE
6163 || enmTrunkType == kIntNetTrunkType_SrvNat /* @todo: what does it mean */
6164#endif
6165 || ( pCur->enmTrunkType == enmTrunkType
6166 && !strcmp(pCur->szTrunk, pszTrunk)))
6167 {
6168 rc = intnetR0CheckOpenNetworkFlags(pCur, fFlags);
6169 if (RT_SUCCESS(rc))
6170 {
6171 /*
6172 * Increment the reference and check that the session
6173 * can access this network.
6174 */
6175 rc = SUPR0ObjAddRef(pCur->pvObj, pSession);
6176 if (RT_SUCCESS(rc))
6177 {
6178 if (pCur->fFlags & INTNET_OPEN_FLAGS_ACCESS_RESTRICTED)
6179 rc = SUPR0ObjVerifyAccess(pCur->pvObj, pSession, pCur->szName);
6180 if (RT_SUCCESS(rc))
6181 *ppNetwork = pCur;
6182 else
6183 SUPR0ObjRelease(pCur->pvObj, pSession);
6184 }
6185 else if (rc == VERR_WRONG_ORDER)
6186 rc = VERR_NOT_FOUND; /* destruction race, pretend the other isn't there. */
6187 }
6188 }
6189 else
6190 {
6191 rc = VERR_INTNET_INCOMPATIBLE_TRUNK;
6192 LogRel(("intnetR0OpenNetwork failed. rc=%Rrc pCur->szTrunk=%s pszTrunk=%s pCur->enmTrunkType=%d enmTrunkType=%d\n",
6193 rc, pCur->szTrunk, pszTrunk, pCur->enmTrunkType, enmTrunkType));
6194 }
6195
6196 LogFlow(("intnetR0OpenNetwork: returns %Rrc *ppNetwork=%p\n", rc, *ppNetwork));
6197 return rc;
6198 }
6199
6200 pCur = pCur->pNext;
6201 }
6202
6203 LogFlow(("intnetR0OpenNetwork: returns VERR_NOT_FOUND\n"));
6204 return VERR_NOT_FOUND;
6205}
6206
6207
6208/**
6209 * Creates a new network.
6210 *
6211 * The call must own the INTNET::hMtxCreateOpenDestroy and has already attempted
6212 * opening the network and found it to be non-existing.
6213 *
6214 * @returns VBox status code.
6215 * @param pIntNet The instance data.
6216 * @param pSession The session handle.
6217 * @param pszNetwork The name of the network. This must be at least one character long and no longer
6218 * than the INTNETNETWORK::szName.
6219 * @param enmTrunkType The trunk type.
6220 * @param pszTrunk The trunk name. Its meaning is specific to the type.
6221 * @param fFlags Flags, see INTNET_OPEN_FLAGS_*.
6222 * @param ppNetwork Where to store the network. In the case of failure
6223 * whatever is returned here should be dereferenced
6224 * outside the INTNET::hMtxCreateOpenDestroy.
6225 */
6226static int intnetR0CreateNetwork(PINTNET pIntNet, PSUPDRVSESSION pSession, const char *pszNetwork, INTNETTRUNKTYPE enmTrunkType,
6227 const char *pszTrunk, uint32_t fFlags, PINTNETNETWORK *ppNetwork)
6228{
6229 LogFlow(("intnetR0CreateNetwork: pIntNet=%p pSession=%p pszNetwork=%p:{%s} enmTrunkType=%d pszTrunk=%p:{%s} fFlags=%#x ppNetwork=%p\n",
6230 pIntNet, pSession, pszNetwork, pszNetwork, enmTrunkType, pszTrunk, pszTrunk, fFlags, ppNetwork));
6231
6232 /* just pro forma validation, the caller is internal. */
6233 AssertPtr(pIntNet);
6234 AssertPtr(pSession);
6235 AssertPtr(pszNetwork);
6236 Assert(enmTrunkType > kIntNetTrunkType_Invalid && enmTrunkType < kIntNetTrunkType_End);
6237 AssertPtr(pszTrunk);
6238 Assert(!(fFlags & ~INTNET_OPEN_FLAGS_MASK));
6239 AssertPtr(ppNetwork);
6240
6241 *ppNetwork = NULL;
6242
6243 /*
6244 * Adjust the flags with defaults for the network policies.
6245 * Note: Main restricts promiscuous mode on the per interface level.
6246 */
6247 fFlags &= ~( INTNET_OPEN_FLAGS_IF_FIXED
6248 | INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW
6249 | INTNET_OPEN_FLAGS_IF_PROMISC_DENY
6250 | INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK
6251 | INTNET_OPEN_FLAGS_IF_PROMISC_NO_TRUNK
6252 | INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES
6253 | INTNET_OPEN_FLAGS_REQUIRE_EXACT);
6254 uint32_t fDefFlags = INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS
6255 | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST
6256 | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE
6257 | INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED
6258 | INTNET_OPEN_FLAGS_TRUNK_HOST_CHASTE_MODE
6259 | INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED
6260 | INTNET_OPEN_FLAGS_TRUNK_WIRE_CHASTE_MODE;
6261 if ( enmTrunkType == kIntNetTrunkType_WhateverNone
6262#ifdef VBOX_WITH_NAT_SERVICE
6263 || enmTrunkType == kIntNetTrunkType_SrvNat /* simialar security */
6264#endif
6265 || enmTrunkType == kIntNetTrunkType_None)
6266 fDefFlags |= INTNET_OPEN_FLAGS_ACCESS_RESTRICTED;
6267 else
6268 fDefFlags |= INTNET_OPEN_FLAGS_ACCESS_PUBLIC;
6269 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
6270 if (!(fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair))
6271 fFlags |= g_afIntNetOpenNetworkNetFlags[i].fPair & fDefFlags;
6272
6273 /*
6274 * Allocate and initialize.
6275 */
6276 size_t cb = sizeof(INTNETNETWORK);
6277 if (fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
6278 cb += INTNETNETWORK_TMP_SIZE + 64;
6279 PINTNETNETWORK pNetwork = (PINTNETNETWORK)RTMemAllocZ(cb);
6280 if (!pNetwork)
6281 return VERR_NO_MEMORY;
6282 //pNetwork->pNext = NULL;
6283 //pNetwork->pIfs = NULL;
6284 pNetwork->hAddrSpinlock = NIL_RTSPINLOCK;
6285 pNetwork->MacTab.cEntries = 0;
6286 pNetwork->MacTab.cEntriesAllocated = INTNET_GROW_DSTTAB_SIZE;
6287 //pNetwork->MacTab.cPromiscuousEntries = 0;
6288 //pNetwork->MacTab.cPromiscuousNoTrunkEntries = 0;
6289 pNetwork->MacTab.paEntries = NULL;
6290 pNetwork->MacTab.fHostPromiscuousReal = false;
6291 pNetwork->MacTab.fHostPromiscuousEff = false;
6292 pNetwork->MacTab.fHostActive = false;
6293 pNetwork->MacTab.fWirePromiscuousReal = false;
6294 pNetwork->MacTab.fWirePromiscuousEff = false;
6295 pNetwork->MacTab.fWireActive = false;
6296 pNetwork->MacTab.pTrunk = NULL;
6297 pNetwork->hEvtBusyIf = NIL_RTSEMEVENT;
6298 pNetwork->pIntNet = pIntNet;
6299 //pNetwork->pvObj = NULL;
6300 if (fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
6301 pNetwork->pbTmp = RT_ALIGN_PT(pNetwork + 1, 64, uint8_t *);
6302 //else
6303 // pNetwork->pbTmp = NULL;
6304 pNetwork->fFlags = fFlags;
6305 //pNetwork->fMinFlags = 0;
6306 //pNetwork->cActiveIFs = 0;
6307 size_t cchName = strlen(pszNetwork);
6308 pNetwork->cchName = (uint8_t)cchName;
6309 Assert(cchName && cchName < sizeof(pNetwork->szName)); /* caller's responsibility. */
6310 memcpy(pNetwork->szName, pszNetwork, cchName); /* '\0' at courtesy of alloc. */
6311 pNetwork->enmTrunkType = enmTrunkType;
6312 Assert(strlen(pszTrunk) < sizeof(pNetwork->szTrunk)); /* caller's responsibility. */
6313 strcpy(pNetwork->szTrunk, pszTrunk);
6314
6315 /*
6316 * Create the semaphore, spinlock and allocate the interface table.
6317 */
6318 int rc = RTSemEventCreate(&pNetwork->hEvtBusyIf);
6319 if (RT_SUCCESS(rc))
6320 rc = RTSpinlockCreate(&pNetwork->hAddrSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "hAddrSpinlock");
6321 if (RT_SUCCESS(rc))
6322 {
6323 pNetwork->MacTab.paEntries = (PINTNETMACTABENTRY)RTMemAlloc(sizeof(INTNETMACTABENTRY) * pNetwork->MacTab.cEntriesAllocated);
6324 if (!pNetwork->MacTab.paEntries)
6325 rc = VERR_NO_MEMORY;
6326 }
6327 if (RT_SUCCESS(rc))
6328 {
6329 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End && RT_SUCCESS(rc); i++)
6330 rc = intnetR0IfAddrCacheInit(&pNetwork->aAddrBlacklist[i], (INTNETADDRTYPE)i,
6331 !!(pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE));
6332 }
6333 if (RT_SUCCESS(rc))
6334 {
6335 /*
6336 * Register the object in the current session and link it into the network list.
6337 */
6338 pNetwork->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_INTERNAL_NETWORK, intnetR0NetworkDestruct, pNetwork, pIntNet);
6339 if (pNetwork->pvObj)
6340 {
6341 pNetwork->pNext = pIntNet->pNetworks;
6342 pIntNet->pNetworks = pNetwork;
6343
6344 /*
6345 * Check if the current session is actually allowed to create and
6346 * open the network. It is possible to implement network name
6347 * based policies and these must be checked now. SUPR0ObjRegister
6348 * does no such checks.
6349 */
6350 rc = SUPR0ObjVerifyAccess(pNetwork->pvObj, pSession, pNetwork->szName);
6351 if (RT_SUCCESS(rc))
6352 {
6353 /*
6354 * Connect the trunk.
6355 */
6356 rc = intnetR0NetworkCreateTrunkIf(pNetwork, pSession);
6357 if (RT_SUCCESS(rc))
6358 {
6359 *ppNetwork = pNetwork;
6360 LogFlow(("intnetR0CreateNetwork: returns VINF_SUCCESS *ppNetwork=%p\n", pNetwork));
6361 return VINF_SUCCESS;
6362 }
6363 }
6364
6365 SUPR0ObjRelease(pNetwork->pvObj, pSession);
6366 LogFlow(("intnetR0CreateNetwork: returns %Rrc\n", rc));
6367 return rc;
6368 }
6369
6370 /* cleanup */
6371 rc = VERR_NO_MEMORY;
6372 }
6373
6374 RTSemEventDestroy(pNetwork->hEvtBusyIf);
6375 pNetwork->hEvtBusyIf = NIL_RTSEMEVENT;
6376 RTSpinlockDestroy(pNetwork->hAddrSpinlock);
6377 pNetwork->hAddrSpinlock = NIL_RTSPINLOCK;
6378 RTMemFree(pNetwork->MacTab.paEntries);
6379 pNetwork->MacTab.paEntries = NULL;
6380 RTMemFree(pNetwork);
6381
6382 LogFlow(("intnetR0CreateNetwork: returns %Rrc\n", rc));
6383 return rc;
6384}
6385
6386
6387/**
6388 * Opens a network interface and connects it to the specified network.
6389 *
6390 * @returns VBox status code.
6391 * @param pSession The session handle.
6392 * @param pszNetwork The network name.
6393 * @param enmTrunkType The trunk type.
6394 * @param pszTrunk The trunk name. Its meaning is specific to the type.
6395 * @param fFlags Flags, see INTNET_OPEN_FLAGS_*.
6396 * @param fRestrictAccess Whether new participants should be subjected to access check or not.
6397 * @param cbSend The send buffer size.
6398 * @param cbRecv The receive buffer size.
6399 * @param phIf Where to store the handle to the network interface.
6400 */
6401INTNETR0DECL(int) IntNetR0Open(PSUPDRVSESSION pSession, const char *pszNetwork,
6402 INTNETTRUNKTYPE enmTrunkType, const char *pszTrunk, uint32_t fFlags,
6403 uint32_t cbSend, uint32_t cbRecv, PINTNETIFHANDLE phIf)
6404{
6405 LogFlow(("IntNetR0Open: pSession=%p pszNetwork=%p:{%s} enmTrunkType=%d pszTrunk=%p:{%s} fFlags=%#x cbSend=%u cbRecv=%u phIf=%p\n",
6406 pSession, pszNetwork, pszNetwork, enmTrunkType, pszTrunk, pszTrunk, fFlags, cbSend, cbRecv, phIf));
6407
6408 /*
6409 * Validate input.
6410 */
6411 PINTNET pIntNet = g_pIntNet;
6412 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
6413 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
6414
6415 AssertPtrReturn(pszNetwork, VERR_INVALID_PARAMETER);
6416 const char *pszNetworkEnd = RTStrEnd(pszNetwork, INTNET_MAX_NETWORK_NAME);
6417 AssertReturn(pszNetworkEnd, VERR_INVALID_PARAMETER);
6418 size_t cchNetwork = pszNetworkEnd - pszNetwork;
6419 AssertReturn(cchNetwork, VERR_INVALID_PARAMETER);
6420
6421 if (pszTrunk)
6422 {
6423 AssertPtrReturn(pszTrunk, VERR_INVALID_PARAMETER);
6424 const char *pszTrunkEnd = RTStrEnd(pszTrunk, INTNET_MAX_TRUNK_NAME);
6425 AssertReturn(pszTrunkEnd, VERR_INVALID_PARAMETER);
6426 }
6427 else
6428 pszTrunk = "";
6429
6430 AssertMsgReturn(enmTrunkType > kIntNetTrunkType_Invalid && enmTrunkType < kIntNetTrunkType_End,
6431 ("%d\n", enmTrunkType), VERR_INVALID_PARAMETER);
6432 switch (enmTrunkType)
6433 {
6434 case kIntNetTrunkType_None:
6435 case kIntNetTrunkType_WhateverNone:
6436#ifdef VBOX_WITH_NAT_SERVICE
6437 case kIntNetTrunkType_SrvNat:
6438#endif
6439 if (*pszTrunk)
6440 return VERR_INVALID_PARAMETER;
6441 break;
6442
6443 case kIntNetTrunkType_NetFlt:
6444 case kIntNetTrunkType_NetAdp:
6445 if (!*pszTrunk)
6446 return VERR_INVALID_PARAMETER;
6447 break;
6448
6449 default:
6450 return VERR_NOT_IMPLEMENTED;
6451 }
6452
6453 AssertMsgReturn(!(fFlags & ~INTNET_OPEN_FLAGS_MASK), ("%#x\n", fFlags), VERR_INVALID_PARAMETER);
6454 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
6455 AssertMsgReturn((fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair) != g_afIntNetOpenNetworkNetFlags[i].fPair,
6456 ("%#x (%#x)\n", fFlags, g_afIntNetOpenNetworkNetFlags[i].fPair), VERR_INVALID_PARAMETER);
6457 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkIfFlags); i++)
6458 AssertMsgReturn((fFlags & g_afIntNetOpenNetworkIfFlags[i].fPair) != g_afIntNetOpenNetworkIfFlags[i].fPair,
6459 ("%#x (%#x)\n", fFlags, g_afIntNetOpenNetworkIfFlags[i].fPair), VERR_INVALID_PARAMETER);
6460 AssertPtrReturn(phIf, VERR_INVALID_PARAMETER);
6461
6462 /*
6463 * Acquire the mutex to serialize open/create/close.
6464 */
6465 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
6466 if (RT_FAILURE(rc))
6467 return rc;
6468
6469 /*
6470 * Try open / create the network and create an interface on it for the
6471 * caller to use.
6472 */
6473 PINTNETNETWORK pNetwork = NULL;
6474 rc = intnetR0OpenNetwork(pIntNet, pSession, pszNetwork, enmTrunkType, pszTrunk, fFlags, &pNetwork);
6475 if (RT_SUCCESS(rc))
6476 {
6477 rc = intnetR0NetworkCreateIf(pNetwork, pSession, cbSend, cbRecv, fFlags, phIf);
6478 if (RT_SUCCESS(rc))
6479 {
6480 intnetR0AdaptOpenNetworkFlags(pNetwork, fFlags);
6481 rc = VINF_ALREADY_INITIALIZED;
6482 }
6483 else
6484 SUPR0ObjRelease(pNetwork->pvObj, pSession);
6485 }
6486 else if (rc == VERR_NOT_FOUND)
6487 {
6488 rc = intnetR0CreateNetwork(pIntNet, pSession, pszNetwork, enmTrunkType, pszTrunk, fFlags, &pNetwork);
6489 if (RT_SUCCESS(rc))
6490 {
6491 rc = intnetR0NetworkCreateIf(pNetwork, pSession, cbSend, cbRecv, fFlags, phIf);
6492 if (RT_FAILURE(rc))
6493 SUPR0ObjRelease(pNetwork->pvObj, pSession);
6494 }
6495 }
6496
6497 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
6498 LogFlow(("IntNetR0Open: return %Rrc *phIf=%RX32\n", rc, *phIf));
6499 return rc;
6500}
6501
6502
6503/**
6504 * VMMR0 request wrapper for IntNetR0Open.
6505 *
6506 * @returns see GMMR0MapUnmapChunk.
6507 * @param pSession The caller's session.
6508 * @param pReq The request packet.
6509 */
6510INTNETR0DECL(int) IntNetR0OpenReq(PSUPDRVSESSION pSession, PINTNETOPENREQ pReq)
6511{
6512 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
6513 return VERR_INVALID_PARAMETER;
6514 return IntNetR0Open(pSession, &pReq->szNetwork[0], pReq->enmTrunkType, pReq->szTrunk,
6515 pReq->fFlags, pReq->cbSend, pReq->cbRecv, &pReq->hIf);
6516}
6517
6518
6519/**
6520 * Count the internal networks.
6521 *
6522 * This is mainly for providing the testcase with some introspection to validate
6523 * behavior when closing interfaces.
6524 *
6525 * @returns The number of networks.
6526 */
6527INTNETR0DECL(uint32_t) IntNetR0GetNetworkCount(void)
6528{
6529 /*
6530 * Grab the instance.
6531 */
6532 PINTNET pIntNet = g_pIntNet;
6533 if (!pIntNet)
6534 return 0;
6535 AssertPtrReturn(pIntNet, 0);
6536 AssertReturn(pIntNet->u32Magic == INTNET_MAGIC, 0);
6537
6538 /*
6539 * Grab the mutex and count the networks.
6540 */
6541 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
6542 if (RT_FAILURE(rc))
6543 return 0;
6544
6545 uint32_t cNetworks = 0;
6546 for (PINTNETNETWORK pCur = pIntNet->pNetworks; pCur; pCur = pCur->pNext)
6547 cNetworks++;
6548
6549 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
6550
6551 return cNetworks;
6552}
6553
6554
6555
6556/**
6557 * Destroys an instance of the Ring-0 internal networking service.
6558 */
6559INTNETR0DECL(void) IntNetR0Term(void)
6560{
6561 LogFlow(("IntNetR0Term:\n"));
6562
6563 /*
6564 * Zap the global pointer and validate it.
6565 */
6566 PINTNET pIntNet = g_pIntNet;
6567 g_pIntNet = NULL;
6568 if (!pIntNet)
6569 return;
6570 AssertPtrReturnVoid(pIntNet);
6571 AssertReturnVoid(pIntNet->u32Magic == INTNET_MAGIC);
6572
6573 /*
6574 * There is not supposed to be any networks hanging around at this time.
6575 */
6576 AssertReturnVoid(ASMAtomicCmpXchgU32(&pIntNet->u32Magic, ~INTNET_MAGIC, INTNET_MAGIC));
6577 Assert(pIntNet->pNetworks == NULL);
6578 if (pIntNet->hMtxCreateOpenDestroy != NIL_RTSEMMUTEX)
6579 {
6580 RTSemMutexDestroy(pIntNet->hMtxCreateOpenDestroy);
6581 pIntNet->hMtxCreateOpenDestroy = NIL_RTSEMMUTEX;
6582 }
6583 if (pIntNet->hHtIfs != NIL_RTHANDLETABLE)
6584 {
6585 /** @todo does it make sense to have a deleter here? */
6586 RTHandleTableDestroy(pIntNet->hHtIfs, NULL, NULL);
6587 pIntNet->hHtIfs = NIL_RTHANDLETABLE;
6588 }
6589
6590 RTMemFree(pIntNet);
6591}
6592
6593
6594/**
6595 * Initializes the internal network ring-0 service.
6596 *
6597 * @returns VBox status code.
6598 */
6599INTNETR0DECL(int) IntNetR0Init(void)
6600{
6601 LogFlow(("IntNetR0Init:\n"));
6602 int rc = VERR_NO_MEMORY;
6603 PINTNET pIntNet = (PINTNET)RTMemAllocZ(sizeof(*pIntNet));
6604 if (pIntNet)
6605 {
6606 //pIntNet->pNetworks = NULL;
6607
6608 rc = RTSemMutexCreate(&pIntNet->hMtxCreateOpenDestroy);
6609 if (RT_SUCCESS(rc))
6610 {
6611 rc = RTHandleTableCreateEx(&pIntNet->hHtIfs, RTHANDLETABLE_FLAGS_LOCKED | RTHANDLETABLE_FLAGS_CONTEXT,
6612 UINT32_C(0x8ffe0000), 4096, intnetR0IfRetainHandle, NULL);
6613 if (RT_SUCCESS(rc))
6614 {
6615 pIntNet->u32Magic = INTNET_MAGIC;
6616 g_pIntNet = pIntNet;
6617 LogFlow(("IntNetR0Init: returns VINF_SUCCESS pIntNet=%p\n", pIntNet));
6618 return VINF_SUCCESS;
6619 }
6620
6621 RTSemMutexDestroy(pIntNet->hMtxCreateOpenDestroy);
6622 }
6623 RTMemFree(pIntNet);
6624 }
6625 LogFlow(("IntNetR0Init: returns %Rrc\n", rc));
6626 return rc;
6627}
6628
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use