VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/SrvIntNetR0.cpp@ 103068

Last change on this file since 103068 was 103025, checked in by vboxsync, 8 months ago

*: Replaced some ASMMemZero32 uses with RT_BZERO. bugref:10588

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 251.2 KB
Line 
1/* $Id: SrvIntNetR0.cpp 103025 2024-01-24 14:21:13Z vboxsync $ */
2/** @file
3 * Internal networking - The ring 0 service.
4 *
5 * @remarks No lazy code changes. If you don't understand exactly what you're
6 * doing, get an understanding or forget it.
7 * All changes shall be reviewed by bird before commit. If not around,
8 * email and let Frank and/or Klaus OK the changes before committing.
9 */
10
11/*
12 * Copyright (C) 2006-2023 Oracle and/or its affiliates.
13 *
14 * This file is part of VirtualBox base platform packages, as
15 * available from https://www.virtualbox.org.
16 *
17 * This program is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU General Public License
19 * as published by the Free Software Foundation, in version 3 of the
20 * License.
21 *
22 * This program is distributed in the hope that it will be useful, but
23 * WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25 * General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program; if not, see <https://www.gnu.org/licenses>.
29 *
30 * SPDX-License-Identifier: GPL-3.0-only
31 */
32
33
34/*********************************************************************************************************************************
35* Header Files *
36*********************************************************************************************************************************/
37#define LOG_GROUP LOG_GROUP_SRV_INTNET
38#include <VBox/intnet.h>
39#include <VBox/intnetinline.h>
40#include <VBox/vmm/pdmnetinline.h>
41#include <VBox/sup.h>
42#include <VBox/vmm/pdm.h>
43#include <VBox/log.h>
44
45#include <iprt/asm.h>
46#include <iprt/assert.h>
47#include <iprt/handletable.h>
48#include <iprt/mp.h>
49#include <iprt/mem.h>
50#include <iprt/net.h>
51#include <iprt/semaphore.h>
52#include <iprt/spinlock.h>
53#include <iprt/string.h>
54#include <iprt/thread.h>
55#include <iprt/time.h>
56
57
58/*********************************************************************************************************************************
59* Defined Constants And Macros *
60*********************************************************************************************************************************/
61/** @def INTNET_WITH_DHCP_SNOOPING
62 * Enabled DHCP snooping when in shared-mac-on-the-wire mode. */
63#define INTNET_WITH_DHCP_SNOOPING
64
65/** The maximum number of interface in a network. */
66#define INTNET_MAX_IFS (1023 + 1 + 16)
67
68/** The number of entries to grow the destination tables with. */
69#if 0
70# define INTNET_GROW_DSTTAB_SIZE 16
71#else
72# define INTNET_GROW_DSTTAB_SIZE 1
73#endif
74
75/** The wakeup bit in the INTNETIF::cBusy and INTNETRUNKIF::cBusy counters. */
76#define INTNET_BUSY_WAKEUP_MASK RT_BIT_32(30)
77
78
79/*********************************************************************************************************************************
80* Structures and Typedefs *
81*********************************************************************************************************************************/
82/**
83 * MAC address lookup table entry.
84 */
85typedef struct INTNETMACTABENTRY
86{
87 /** The MAC address of this entry. */
88 RTMAC MacAddr;
89 /** Is it is effectively promiscuous mode. */
90 bool fPromiscuousEff;
91 /** Is it promiscuous and should it see unrelated trunk traffic. */
92 bool fPromiscuousSeeTrunk;
93 /** Is it active.
94 * We ignore the entry if this is clear and may end up sending packets addressed
95 * to this interface onto the trunk. The reasoning for this is that this could
96 * be the interface of a VM that just has been teleported to a different host. */
97 bool fActive;
98 /** Pointer to the network interface. */
99 struct INTNETIF *pIf;
100} INTNETMACTABENTRY;
101/** Pointer to a MAC address lookup table entry. */
102typedef INTNETMACTABENTRY *PINTNETMACTABENTRY;
103
104/**
105 * MAC address lookup table.
106 *
107 * @todo Having this in a separate structure didn't work out as well as it
108 * should. Consider merging it into INTNETNETWORK.
109 */
110typedef struct INTNETMACTAB
111{
112 /** The current number of entries. */
113 uint32_t cEntries;
114 /** The number of entries we've allocated space for. */
115 uint32_t cEntriesAllocated;
116 /** Table entries. */
117 PINTNETMACTABENTRY paEntries;
118
119 /** The number of interface entries currently in promicuous mode. */
120 uint32_t cPromiscuousEntries;
121 /** The number of interface entries currently in promicuous mode that
122 * shall not see unrelated trunk traffic. */
123 uint32_t cPromiscuousNoTrunkEntries;
124
125 /** The host MAC address (reported). */
126 RTMAC HostMac;
127 /** The effective host promiscuous setting (reported). */
128 bool fHostPromiscuousEff;
129 /** The real host promiscuous setting (reported). */
130 bool fHostPromiscuousReal;
131 /** Whether the host is active. */
132 bool fHostActive;
133
134 /** Whether the wire is promiscuous (config). */
135 bool fWirePromiscuousEff;
136 /** Whether the wire is promiscuous (config).
137 * (Shadows INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE in
138 * INTNETNETWORK::fFlags.) */
139 bool fWirePromiscuousReal;
140 /** Whether the wire is active. */
141 bool fWireActive;
142
143 /** Pointer to the trunk interface. */
144 struct INTNETTRUNKIF *pTrunk;
145} INTNETMACTAB;
146/** Pointer to a MAC address . */
147typedef INTNETMACTAB *PINTNETMACTAB;
148
149/**
150 * Destination table.
151 */
152typedef struct INTNETDSTTAB
153{
154 /** The trunk destinations. */
155 uint32_t fTrunkDst;
156 /** Pointer to the trunk interface (referenced) if fTrunkDst is non-zero. */
157 struct INTNETTRUNKIF *pTrunk;
158 /** The number of destination interfaces. */
159 uint32_t cIfs;
160 /** The interfaces (referenced). Variable sized array. */
161 struct
162 {
163 /** The destination interface. */
164 struct INTNETIF *pIf;
165 /** Whether to replace the destination MAC address.
166 * This is used when sharing MAC address with the host on the wire(less). */
167 bool fReplaceDstMac;
168 } aIfs[1];
169} INTNETDSTTAB;
170/** Pointer to a destination table. */
171typedef INTNETDSTTAB *PINTNETDSTTAB;
172/** Pointer to a const destination table. */
173typedef INTNETDSTTAB const *PCINTNETDSTTAB;
174
175/**
176 * Address and type.
177 */
178typedef struct INTNETADDR
179{
180 /** The address type. */
181 INTNETADDRTYPE enmType;
182 /** The address. */
183 RTNETADDRU Addr;
184} INTNETADDR;
185/** Pointer to an address. */
186typedef INTNETADDR *PINTNETADDR;
187/** Pointer to a const address. */
188typedef INTNETADDR const *PCINTNETADDR;
189
190
191/**
192 * Address cache for a specific network layer.
193 */
194typedef struct INTNETADDRCACHE
195{
196 /** Pointer to the table of addresses. */
197 uint8_t *pbEntries;
198 /** The number of valid address entries. */
199 uint8_t cEntries;
200 /** The number of allocated address entries. */
201 uint8_t cEntriesAlloc;
202 /** The address size. */
203 uint8_t cbAddress;
204 /** The size of an entry. */
205 uint8_t cbEntry;
206} INTNETADDRCACHE;
207/** Pointer to an address cache. */
208typedef INTNETADDRCACHE *PINTNETADDRCACHE;
209/** Pointer to a const address cache. */
210typedef INTNETADDRCACHE const *PCINTNETADDRCACHE;
211
212
213/**
214 * A network interface.
215 *
216 * Unless explicitly stated, all members are protect by the network semaphore.
217 */
218typedef struct INTNETIF
219{
220 /** The MAC address.
221 * This is shadowed by INTNETMACTABENTRY::MacAddr. */
222 RTMAC MacAddr;
223 /** Set if the INTNET::MacAddr member has been explicitly set. */
224 bool fMacSet;
225 /** Tracks the desired promiscuous setting of the interface. */
226 bool fPromiscuousReal;
227 /** Whether the interface is active or not.
228 * This is shadowed by INTNETMACTABENTRY::fActive. */
229 bool fActive;
230 /** Whether someone has indicated that the end is nigh by means of IntNetR0IfAbortWait. */
231 bool volatile fNoMoreWaits;
232 /** The flags specified when opening this interface. */
233 uint32_t fOpenFlags;
234 /** Number of yields done to try make the interface read pending data.
235 * We will stop yielding when this reaches a threshold assuming that the VM is
236 * paused or that it simply isn't worth all the delay. It is cleared when a
237 * successful send has been done. */
238 uint32_t cYields;
239 /** Pointer to the current exchange buffer (ring-0). */
240 PINTNETBUF pIntBuf;
241 /** Pointer to ring-3 mapping of the current exchange buffer. */
242 R3PTRTYPE(PINTNETBUF) pIntBufR3;
243 /** Pointer to the default exchange buffer for the interface. */
244 PINTNETBUF pIntBufDefault;
245 /** Pointer to ring-3 mapping of the default exchange buffer. */
246 R3PTRTYPE(PINTNETBUF) pIntBufDefaultR3;
247#if !defined(VBOX_WITH_INTNET_SERVICE_IN_R3) || !defined(IN_RING3)
248 /** Event semaphore which a receiver/consumer thread will sleep on while
249 * waiting for data to arrive. */
250 RTSEMEVENT volatile hRecvEvent;
251 /** Number of threads sleeping on the event semaphore. */
252 uint32_t volatile cSleepers;
253#else
254 /** The callback to call when there is something to receive/consume. */
255 PFNINTNETIFRECVAVAIL pfnRecvAvail;
256 /** Opaque user data to pass to the receive avail callback (pfnRecvAvail). */
257 void *pvUserRecvAvail;
258#endif
259 /** The interface handle.
260 * When this is INTNET_HANDLE_INVALID a sleeper which is waking up
261 * should return with the appropriate error condition. */
262 INTNETIFHANDLE volatile hIf;
263 /** The native handle of the destructor thread. This is NIL_RTNATIVETHREAD when
264 * the object is valid and set when intnetR0IfDestruct is in progress. This is
265 * used to cover an unlikely (impossible?) race between SUPDRVSESSION cleanup
266 * and lingering threads waiting for recv or similar. */
267 RTNATIVETHREAD volatile hDestructorThread;
268 /** Pointer to the network this interface is connected to.
269 * This is protected by the INTNET::hMtxCreateOpenDestroy. */
270 struct INTNETNETWORK *pNetwork;
271 /** The session this interface is associated with. */
272 PSUPDRVSESSION pSession;
273 /** The SUPR0 object id. */
274 void *pvObj;
275 /** The network layer address cache. (Indexed by type, 0 entry isn't used.)
276 * This is protected by the address spinlock of the network. */
277 INTNETADDRCACHE aAddrCache[kIntNetAddrType_End];
278 /** Spinlock protecting the input (producer) side of the receive ring. */
279 RTSPINLOCK hRecvInSpinlock;
280 /** Busy count for tracking destination table references and active sends.
281 * Usually incremented while owning the switch table spinlock. The 30th bit
282 * is used to indicate wakeup. */
283 uint32_t volatile cBusy;
284 /** The preallocated destination table.
285 * This is NULL when it's in use as a precaution against unserialized
286 * transmitting. This is grown when new interfaces are added to the network. */
287 PINTNETDSTTAB volatile pDstTab;
288 /** Pointer to the trunk's per interface data. Can be NULL. */
289 void *pvIfData;
290 /** Header buffer for when we're carving GSO frames. */
291 uint8_t abGsoHdrs[256];
292} INTNETIF;
293/** Pointer to an internal network interface. */
294typedef INTNETIF *PINTNETIF;
295
296
297/**
298 * A trunk interface.
299 */
300typedef struct INTNETTRUNKIF
301{
302 /** The port interface we present to the component. */
303 INTNETTRUNKSWPORT SwitchPort;
304 /** The port interface we get from the component. */
305 PINTNETTRUNKIFPORT pIfPort;
306 /** Pointer to the network we're connect to.
307 * This may be NULL if we're orphaned? */
308 struct INTNETNETWORK *pNetwork;
309 /** The current MAC address for the interface. (reported)
310 * Updated while owning the switch table spinlock. */
311 RTMAC MacAddr;
312 /** Whether to supply physical addresses with the outbound SGs. (reported) */
313 bool fPhysSG;
314 /** Explicit alignment. */
315 bool fUnused;
316 /** Busy count for tracking destination table references and active sends.
317 * Usually incremented while owning the switch table spinlock. The 30th bit
318 * is used to indicate wakeup. */
319 uint32_t volatile cBusy;
320 /** Mask of destinations that pfnXmit cope with disabled preemption for. */
321 uint32_t fNoPreemptDsts;
322 /** The GSO capabilities of the wire destination. (reported) */
323 uint32_t fWireGsoCapabilites;
324 /** The GSO capabilities of the host destination. (reported)
325 * This is as bit map where each bit represents the GSO type with the same
326 * number. */
327 uint32_t fHostGsoCapabilites;
328 /** The destination table spinlock, interrupt safe.
329 * Protects apTaskDstTabs and apIntDstTabs. */
330 RTSPINLOCK hDstTabSpinlock;
331 /** The number of entries in apIntDstTabs. */
332 uint32_t cIntDstTabs;
333 /** The task time destination tables.
334 * @remarks intnetR0NetworkEnsureTabSpace and others ASSUMES this immediately
335 * precedes apIntDstTabs so that these two tables can be used as one
336 * contiguous one. */
337 PINTNETDSTTAB apTaskDstTabs[2];
338 /** The interrupt / disabled-preemption time destination tables.
339 * This is a variable sized array. */
340 PINTNETDSTTAB apIntDstTabs[1];
341} INTNETTRUNKIF;
342/** Pointer to a trunk interface. */
343typedef INTNETTRUNKIF *PINTNETTRUNKIF;
344
345/** Converts a pointer to INTNETTRUNKIF::SwitchPort to a PINTNETTRUNKIF. */
346#define INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort) ((PINTNETTRUNKIF)(pSwitchPort))
347
348
349/**
350 * Internal representation of a network.
351 */
352typedef struct INTNETNETWORK
353{
354 /** The Next network in the chain.
355 * This is protected by the INTNET::hMtxCreateOpenDestroy. */
356 struct INTNETNETWORK *pNext;
357
358 /** The spinlock protecting MacTab, aAddrBlacklist and INTNETIF::aAddrCache.
359 * Interrupt safe. */
360 RTSPINLOCK hAddrSpinlock;
361 /** MAC address table.
362 * This doubles as interface collection. */
363 INTNETMACTAB MacTab;
364
365 /** The network layer address cache. (Indexed by type, 0 entry isn't used.
366 * Contains host addresses. We don't let guests spoof them. */
367 INTNETADDRCACHE aAddrBlacklist[kIntNetAddrType_End];
368
369 /** Wait for an interface to stop being busy so it can be removed or have its
370 * destination table replaced. We have to wait upon this while owning the
371 * network mutex. Will only ever have one waiter because of the big mutex. */
372 RTSEMEVENT hEvtBusyIf;
373 /** Pointer to the instance data. */
374 struct INTNET *pIntNet;
375 /** The SUPR0 object id. */
376 void *pvObj;
377 /** The trunk reconnection system thread. The thread gets started at trunk
378 * disconnection. It tries to reconnect the trunk to the bridged filter instance.
379 * The thread erases this handle right before it terminates.
380 */
381 RTTHREAD hTrunkReconnectThread;
382 /** Trunk reconnection thread termination flag. */
383 bool volatile fTerminateReconnectThread;
384 /** Pointer to the temporary buffer that is used when snooping fragmented packets.
385 * This is allocated after this structure if we're sharing the MAC address with
386 * the host. The buffer is INTNETNETWORK_TMP_SIZE big and aligned on a 64-byte boundary. */
387 uint8_t *pbTmp;
388 /** Network creation flags (INTNET_OPEN_FLAGS_*). */
389 uint32_t fFlags;
390 /** Any restrictive policies required as a minimum by some interface.
391 * (INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES) */
392 uint32_t fMinFlags;
393 /** The number of active interfaces (excluding the trunk). */
394 uint32_t cActiveIFs;
395 /** The length of the network name. */
396 uint8_t cchName;
397 /** The network name. */
398 char szName[INTNET_MAX_NETWORK_NAME];
399 /** The trunk type. */
400 INTNETTRUNKTYPE enmTrunkType;
401 /** The trunk name. */
402 char szTrunk[INTNET_MAX_TRUNK_NAME];
403} INTNETNETWORK;
404/** Pointer to an internal network. */
405typedef INTNETNETWORK *PINTNETNETWORK;
406/** Pointer to a const internal network. */
407typedef const INTNETNETWORK *PCINTNETNETWORK;
408
409/** The size of the buffer INTNETNETWORK::pbTmp points at. */
410#define INTNETNETWORK_TMP_SIZE 2048
411
412
413/**
414 * Internal networking instance.
415 */
416typedef struct INTNET
417{
418 /** Magic number (INTNET_MAGIC). */
419 uint32_t volatile u32Magic;
420 /** Mutex protecting the creation, opening and destruction of both networks and
421 * interfaces. (This means all operations affecting the pNetworks list.) */
422 RTSEMMUTEX hMtxCreateOpenDestroy;
423 /** List of networks. Protected by INTNET::Spinlock. */
424 PINTNETNETWORK volatile pNetworks;
425 /** Handle table for the interfaces. */
426 RTHANDLETABLE hHtIfs;
427} INTNET;
428/** Pointer to an internal network ring-0 instance. */
429typedef struct INTNET *PINTNET;
430
431/** Magic number for the internal network instance data (Hayao Miyazaki). */
432#define INTNET_MAGIC UINT32_C(0x19410105)
433
434
435/*********************************************************************************************************************************
436* Global Variables *
437*********************************************************************************************************************************/
438/** Pointer to the internal network instance data. */
439static PINTNET volatile g_pIntNet = NULL;
440
441static const struct INTNETOPENNETWORKFLAGS
442{
443 uint32_t fRestrictive; /**< The restrictive flag (deny/disabled). */
444 uint32_t fRelaxed; /**< The relaxed flag (allow/enabled). */
445 uint32_t fFixed; /**< The config-fixed flag. */
446 uint32_t fPair; /**< The pair of restrictive and relaxed flags. */
447}
448/** Open network policy flags relating to the network. */
449g_afIntNetOpenNetworkNetFlags[] =
450{
451 { INTNET_OPEN_FLAGS_ACCESS_RESTRICTED, INTNET_OPEN_FLAGS_ACCESS_PUBLIC, INTNET_OPEN_FLAGS_ACCESS_FIXED, INTNET_OPEN_FLAGS_ACCESS_RESTRICTED | INTNET_OPEN_FLAGS_ACCESS_PUBLIC },
452 { INTNET_OPEN_FLAGS_PROMISC_DENY_CLIENTS, INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS, INTNET_OPEN_FLAGS_PROMISC_FIXED, INTNET_OPEN_FLAGS_PROMISC_DENY_CLIENTS | INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS },
453 { INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_HOST, INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST, INTNET_OPEN_FLAGS_PROMISC_FIXED, INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_HOST | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST },
454 { INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_WIRE, INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE, INTNET_OPEN_FLAGS_PROMISC_FIXED, INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_WIRE | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE },
455 { INTNET_OPEN_FLAGS_TRUNK_HOST_DISABLED, INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_HOST_DISABLED | INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED },
456 { INTNET_OPEN_FLAGS_TRUNK_HOST_CHASTE_MODE, INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_HOST_CHASTE_MODE | INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE },
457 { INTNET_OPEN_FLAGS_TRUNK_WIRE_DISABLED, INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_WIRE_DISABLED | INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED },
458 { INTNET_OPEN_FLAGS_TRUNK_WIRE_CHASTE_MODE, INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_WIRE_CHASTE_MODE | INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE },
459},
460/** Open network policy flags relating to the new interface. */
461g_afIntNetOpenNetworkIfFlags[] =
462{
463 { INTNET_OPEN_FLAGS_IF_PROMISC_DENY, INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW, INTNET_OPEN_FLAGS_IF_FIXED, INTNET_OPEN_FLAGS_IF_PROMISC_DENY | INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW },
464 { INTNET_OPEN_FLAGS_IF_PROMISC_NO_TRUNK, INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK, INTNET_OPEN_FLAGS_IF_FIXED, INTNET_OPEN_FLAGS_IF_PROMISC_NO_TRUNK | INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK },
465};
466
467
468/*********************************************************************************************************************************
469* Forward Declarations *
470*********************************************************************************************************************************/
471static void intnetR0TrunkIfDestroy(PINTNETTRUNKIF pThis, PINTNETNETWORK pNetwork);
472
473
474/**
475 * Checks if a pointer belongs to the list of known networks without
476 * accessing memory it points to.
477 *
478 * @returns true, if such network is in the list.
479 * @param pIntNet The pointer to the internal network instance (global).
480 * @param pNetwork The pointer that must be validated.
481 */
482DECLINLINE(bool) intnetR0NetworkIsValid(PINTNET pIntNet, PINTNETNETWORK pNetwork)
483{
484 for (PINTNETNETWORK pCurr = pIntNet->pNetworks; pCurr; pCurr = pCurr->pNext)
485 if (pCurr == pNetwork)
486 return true;
487 return false;
488}
489
490
491/**
492 * Worker for intnetR0SgWritePart that deals with the case where the
493 * request doesn't fit into the first segment.
494 *
495 * @returns true, unless the request or SG invalid.
496 * @param pSG The SG list to write to.
497 * @param off Where to start writing (offset into the SG).
498 * @param cb How much to write.
499 * @param pvBuf The buffer to containing the bits to write.
500 */
501static bool intnetR0SgWritePartSlow(PCINTNETSG pSG, uint32_t off, uint32_t cb, void const *pvBuf)
502{
503 if (RT_UNLIKELY(off + cb > pSG->cbTotal))
504 return false;
505
506 /*
507 * Skip ahead to the segment where off starts.
508 */
509 unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
510 unsigned iSeg = 0;
511 while (off > pSG->aSegs[iSeg].cb)
512 {
513 off -= pSG->aSegs[iSeg++].cb;
514 AssertReturn(iSeg < cSegs, false);
515 }
516
517 /*
518 * Copy the data, hoping that it's all from one segment...
519 */
520 uint32_t cbCanCopy = pSG->aSegs[iSeg].cb - off;
521 if (cbCanCopy >= cb)
522 memcpy((uint8_t *)pSG->aSegs[iSeg].pv + off, pvBuf, cb);
523 else
524 {
525 /* copy the portion in the current segment. */
526 memcpy((uint8_t *)pSG->aSegs[iSeg].pv + off, pvBuf, cbCanCopy);
527 cb -= cbCanCopy;
528
529 /* copy the portions in the other segments. */
530 do
531 {
532 pvBuf = (uint8_t const *)pvBuf + cbCanCopy;
533 iSeg++;
534 AssertReturn(iSeg < cSegs, false);
535
536 cbCanCopy = RT_MIN(cb, pSG->aSegs[iSeg].cb);
537 memcpy(pSG->aSegs[iSeg].pv, pvBuf, cbCanCopy);
538
539 cb -= cbCanCopy;
540 } while (cb > 0);
541 }
542
543 return true;
544}
545
546
547/**
548 * Writes to a part of an SG.
549 *
550 * @returns true on success, false on failure (out of bounds).
551 * @param pSG The SG list to write to.
552 * @param off Where to start writing (offset into the SG).
553 * @param cb How much to write.
554 * @param pvBuf The buffer to containing the bits to write.
555 */
556DECLINLINE(bool) intnetR0SgWritePart(PCINTNETSG pSG, uint32_t off, uint32_t cb, void const *pvBuf)
557{
558 Assert(off + cb > off);
559
560 /* The optimized case. */
561 if (RT_LIKELY( pSG->cSegsUsed == 1
562 || pSG->aSegs[0].cb >= off + cb))
563 {
564 Assert(pSG->cbTotal == pSG->aSegs[0].cb);
565 memcpy((uint8_t *)pSG->aSegs[0].pv + off, pvBuf, cb);
566 return true;
567 }
568 return intnetR0SgWritePartSlow(pSG, off, cb, pvBuf);
569}
570
571
572/**
573 * Reads a byte from a SG list.
574 *
575 * @returns The byte on success. 0xff on failure.
576 * @param pSG The SG list to read.
577 * @param off The offset (into the SG) off the byte.
578 */
579DECLINLINE(uint8_t) intnetR0SgReadByte(PCINTNETSG pSG, uint32_t off)
580{
581 if (RT_LIKELY(pSG->aSegs[0].cb > off))
582 return ((uint8_t const *)pSG->aSegs[0].pv)[off];
583
584 off -= pSG->aSegs[0].cb;
585 unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
586 for (unsigned iSeg = 1; iSeg < cSegs; iSeg++)
587 {
588 if (pSG->aSegs[iSeg].cb > off)
589 return ((uint8_t const *)pSG->aSegs[iSeg].pv)[off];
590 off -= pSG->aSegs[iSeg].cb;
591 }
592 return false;
593}
594
595
596/**
597 * Worker for intnetR0SgReadPart that deals with the case where the
598 * requested data isn't in the first segment.
599 *
600 * @returns true, unless the SG is invalid.
601 * @param pSG The SG list to read.
602 * @param off Where to start reading (offset into the SG).
603 * @param cb How much to read.
604 * @param pvBuf The buffer to read into.
605 */
606static bool intnetR0SgReadPartSlow(PCINTNETSG pSG, uint32_t off, uint32_t cb, void *pvBuf)
607{
608 if (RT_UNLIKELY(off + cb > pSG->cbTotal))
609 return false;
610
611 /*
612 * Skip ahead to the segment where off starts.
613 */
614 unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
615 unsigned iSeg = 0;
616 while (off > pSG->aSegs[iSeg].cb)
617 {
618 off -= pSG->aSegs[iSeg++].cb;
619 AssertReturn(iSeg < cSegs, false);
620 }
621
622 /*
623 * Copy the data, hoping that it's all from one segment...
624 */
625 uint32_t cbCanCopy = pSG->aSegs[iSeg].cb - off;
626 if (cbCanCopy >= cb)
627 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv + off, cb);
628 else
629 {
630 /* copy the portion in the current segment. */
631 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv + off, cbCanCopy);
632 cb -= cbCanCopy;
633
634 /* copy the portions in the other segments. */
635 do
636 {
637 pvBuf = (uint8_t *)pvBuf + cbCanCopy;
638 iSeg++;
639 AssertReturn(iSeg < cSegs, false);
640
641 cbCanCopy = RT_MIN(cb, pSG->aSegs[iSeg].cb);
642 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv, cbCanCopy);
643
644 cb -= cbCanCopy;
645 } while (cb > 0);
646 }
647
648 return true;
649}
650
651
652/**
653 * Reads a part of an SG into a buffer.
654 *
655 * @returns true on success, false on failure (out of bounds).
656 * @param pSG The SG list to read.
657 * @param off Where to start reading (offset into the SG).
658 * @param cb How much to read.
659 * @param pvBuf The buffer to read into.
660 */
661DECLINLINE(bool) intnetR0SgReadPart(PCINTNETSG pSG, uint32_t off, uint32_t cb, void *pvBuf)
662{
663 Assert(off + cb > off);
664
665 /* The optimized case. */
666 if (RT_LIKELY(pSG->aSegs[0].cb >= off + cb))
667 {
668 AssertMsg(pSG->cbTotal >= pSG->aSegs[0].cb, ("%#x vs %#x\n", pSG->cbTotal, pSG->aSegs[0].cb));
669 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[0].pv + off, cb);
670 return true;
671 }
672 return intnetR0SgReadPartSlow(pSG, off, cb, pvBuf);
673}
674
675
676/**
677 * Wait for a busy counter to reach zero.
678 *
679 * @param pNetwork The network.
680 * @param pcBusy The busy counter.
681 */
682static void intnetR0BusyWait(PINTNETNETWORK pNetwork, uint32_t volatile *pcBusy)
683{
684 if (ASMAtomicReadU32(pcBusy) == 0)
685 return;
686
687 /*
688 * We have to be a bit cautious here so we don't destroy the network or the
689 * semaphore before intnetR0BusyDec has signalled us.
690 */
691
692 /* Reset the semaphore and flip the wakeup bit. */
693 RTSemEventWait(pNetwork->hEvtBusyIf, 0); /* clear it */
694 uint32_t cCurBusy = ASMAtomicReadU32(pcBusy);
695 do
696 {
697 if (cCurBusy == 0)
698 return;
699 AssertMsg(!(cCurBusy & INTNET_BUSY_WAKEUP_MASK), ("%#x\n", cCurBusy));
700 AssertMsg((cCurBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cCurBusy));
701 } while (!ASMAtomicCmpXchgExU32(pcBusy, cCurBusy | INTNET_BUSY_WAKEUP_MASK, cCurBusy, &cCurBusy));
702
703 /* Wait for the count to reach zero. */
704 do
705 {
706 int rc2 = RTSemEventWait(pNetwork->hEvtBusyIf, 30000); NOREF(rc2);
707 //AssertMsg(RT_SUCCESS(rc2), ("rc=%Rrc *pcBusy=%#x (%#x)\n", rc2, ASMAtomicReadU32(pcBusy), cCurBusy ));
708 cCurBusy = ASMAtomicReadU32(pcBusy);
709 AssertMsg((cCurBusy & INTNET_BUSY_WAKEUP_MASK), ("%#x\n", cCurBusy));
710 AssertMsg((cCurBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cCurBusy));
711 } while ( cCurBusy != INTNET_BUSY_WAKEUP_MASK
712 || !ASMAtomicCmpXchgU32(pcBusy, 0, INTNET_BUSY_WAKEUP_MASK));
713}
714
715
716/**
717 * Decrements the busy counter and maybe wakes up any threads waiting for it to
718 * reach zero.
719 *
720 * @param pNetwork The network.
721 * @param pcBusy The busy counter.
722 */
723DECLINLINE(void) intnetR0BusyDec(PINTNETNETWORK pNetwork, uint32_t volatile *pcBusy)
724{
725 uint32_t cNewBusy = ASMAtomicDecU32(pcBusy);
726 if (RT_UNLIKELY( cNewBusy == INTNET_BUSY_WAKEUP_MASK
727 && pNetwork))
728 RTSemEventSignal(pNetwork->hEvtBusyIf);
729 AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
730}
731
732
733/**
734 * Increments the busy count of the specified interface.
735 *
736 * The caller must own the MAC address table spinlock.
737 *
738 * @param pIf The interface.
739 */
740DECLINLINE(void) intnetR0BusyDecIf(PINTNETIF pIf)
741{
742 intnetR0BusyDec(pIf->pNetwork, &pIf->cBusy);
743}
744
745
746/**
747 * Increments the busy count of the specified interface.
748 *
749 * The caller must own the MAC address table spinlock or an explicity reference.
750 *
751 * @param pTrunk The trunk.
752 */
753DECLINLINE(void) intnetR0BusyDecTrunk(PINTNETTRUNKIF pTrunk)
754{
755 if (pTrunk)
756 intnetR0BusyDec(pTrunk->pNetwork, &pTrunk->cBusy);
757}
758
759
760/**
761 * Increments the busy count of the specified interface.
762 *
763 * The caller must own the MAC address table spinlock or an explicity reference.
764 *
765 * @param pIf The interface.
766 */
767DECLINLINE(void) intnetR0BusyIncIf(PINTNETIF pIf)
768{
769 uint32_t cNewBusy = ASMAtomicIncU32(&pIf->cBusy);
770 AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
771 NOREF(cNewBusy);
772}
773
774
775/**
776 * Increments the busy count of the specified interface.
777 *
778 * The caller must own the MAC address table spinlock or an explicity reference.
779 *
780 * @param pTrunk The trunk.
781 */
782DECLINLINE(void) intnetR0BusyIncTrunk(PINTNETTRUNKIF pTrunk)
783{
784 if (!pTrunk) return;
785 uint32_t cNewBusy = ASMAtomicIncU32(&pTrunk->cBusy);
786 AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
787 NOREF(cNewBusy);
788}
789
790
791/**
792 * Retain an interface.
793 *
794 * @returns VBox status code, can assume success in most situations.
795 * @param pIf The interface instance.
796 * @param pSession The current session.
797 */
798DECLINLINE(int) intnetR0IfRetain(PINTNETIF pIf, PSUPDRVSESSION pSession)
799{
800 Assert(pIf->hDestructorThread == NIL_RTNATIVETHREAD);
801
802 int rc = SUPR0ObjAddRefEx(pIf->pvObj, pSession, true /* fNoBlocking */);
803 AssertRCReturn(rc, rc);
804
805 return VINF_SUCCESS;
806}
807
808
809/**
810 * Release an interface previously retained by intnetR0IfRetain or
811 * by handle lookup/freeing.
812 *
813 * @returns true if destroyed, false if not.
814 * @param pIf The interface instance.
815 * @param pSession The current session.
816 */
817DECLINLINE(bool) intnetR0IfRelease(PINTNETIF pIf, PSUPDRVSESSION pSession)
818{
819 Assert(pIf->hDestructorThread == NIL_RTNATIVETHREAD);
820
821 int rc = SUPR0ObjRelease(pIf->pvObj, pSession);
822 AssertRC(rc);
823
824 return rc == VINF_OBJECT_DESTROYED;
825}
826
827
828/**
829 * RTHandleCreateEx callback that retains an object in the
830 * handle table before returning it.
831 *
832 * (Avoids racing the freeing of the handle.)
833 *
834 * @returns VBox status code.
835 * @param hHandleTable The handle table (ignored).
836 * @param pvObj The object (INTNETIF).
837 * @param pvCtx The context (SUPDRVSESSION).
838 * @param pvUser The user context (ignored).
839 */
840static DECLCALLBACK(int) intnetR0IfRetainHandle(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser)
841{
842 NOREF(pvUser);
843 NOREF(hHandleTable);
844
845 PINTNETIF pIf = (PINTNETIF)pvObj;
846 RTNATIVETHREAD hDtorThrd;
847 ASMAtomicUoReadHandle(&pIf->hDestructorThread, &hDtorThrd);
848 if (hDtorThrd == NIL_RTNATIVETHREAD)
849 return intnetR0IfRetain(pIf, (PSUPDRVSESSION)pvCtx);
850
851 /* Allow intnetR0IfDestruct to call RTHandleTableFreeWithCtx to free
852 the handle, but not even think about retaining a referenceas we don't
853 want to confuse SUPDrv and risk having the destructor called twice. */
854 if (hDtorThrd == RTThreadNativeSelf())
855 return VINF_SUCCESS;
856
857 return VERR_SEM_DESTROYED;
858}
859
860
861
862/**
863 * Checks if the interface has a usable MAC address or not.
864 *
865 * @returns true if MacAddr is usable, false if not.
866 * @param pIf The interface.
867 */
868DECL_FORCE_INLINE(bool) intnetR0IfHasMacAddr(PINTNETIF pIf)
869{
870 return pIf->fMacSet || !(pIf->MacAddr.au8[0] & 1);
871}
872
873
874/**
875 * Locates the MAC address table entry for the given interface.
876 *
877 * The caller holds the MAC address table spinlock, obviously.
878 *
879 * @returns Pointer to the entry on if found, NULL if not.
880 * @param pNetwork The network.
881 * @param pIf The interface.
882 */
883DECLINLINE(PINTNETMACTABENTRY) intnetR0NetworkFindMacAddrEntry(PINTNETNETWORK pNetwork, PINTNETIF pIf)
884{
885 uint32_t iIf = pNetwork->MacTab.cEntries;
886 while (iIf-- > 0)
887 {
888 if (pNetwork->MacTab.paEntries[iIf].pIf == pIf)
889 return &pNetwork->MacTab.paEntries[iIf];
890 }
891 return NULL;
892}
893
894
895/**
896 * Checks if the IPv6 address is a good interface address.
897 * @returns true/false.
898 * @param addr The address, network endian.
899 */
900DECLINLINE(bool) intnetR0IPv6AddrIsGood(RTNETADDRIPV6 addr)
901{
902 return !( ( addr.QWords.qw0 == 0 && addr.QWords.qw1 == 0) /* :: */
903 || ( (addr.Words.w0 & RT_H2BE_U16(0xff00)) == RT_H2BE_U16(0xff00)) /* multicast */
904 || ( addr.Words.w0 == 0 && addr.Words.w1 == 0
905 && addr.Words.w2 == 0 && addr.Words.w3 == 0
906 && addr.Words.w4 == 0 && addr.Words.w5 == 0
907 && addr.Words.w6 == 0 && addr.Words.w7 == RT_H2BE_U16(0x0001))); /* ::1 */
908}
909
910
911#if 0 /* unused */
912/**
913 * Checks if the IPv4 address is a broadcast address.
914 * @returns true/false.
915 * @param Addr The address, network endian.
916 */
917DECLINLINE(bool) intnetR0IPv4AddrIsBroadcast(RTNETADDRIPV4 Addr)
918{
919 /* Just check for 255.255.255.255 atm. */
920 return Addr.u == UINT32_MAX;
921}
922#endif /* unused */
923
924
925/**
926 * Checks if the IPv4 address is a good interface address.
927 * @returns true/false.
928 * @param Addr The address, network endian.
929 */
930DECLINLINE(bool) intnetR0IPv4AddrIsGood(RTNETADDRIPV4 Addr)
931{
932 /* Usual suspects. */
933 if ( Addr.u == UINT32_MAX /* 255.255.255.255 - broadcast. */
934 || Addr.au8[0] == 0) /* Current network, can be used as source address. */
935 return false;
936
937 /* Unusual suspects. */
938 if (RT_UNLIKELY( Addr.au8[0] == 127 /* Loopback */
939 || (Addr.au8[0] & 0xf0) == 224 /* Multicast */
940 ))
941 return false;
942 return true;
943}
944
945
946/**
947 * Gets the address size of a network layer type.
948 *
949 * @returns size in bytes.
950 * @param enmType The type.
951 */
952DECLINLINE(uint8_t) intnetR0AddrSize(INTNETADDRTYPE enmType)
953{
954 switch (enmType)
955 {
956 case kIntNetAddrType_IPv4: return 4;
957 case kIntNetAddrType_IPv6: return 16;
958 case kIntNetAddrType_IPX: return 4 + 6;
959 default: AssertFailedReturn(0);
960 }
961}
962
963
964/**
965 * Compares two address to see if they are equal, assuming naturally align structures.
966 *
967 * @returns true if equal, false if not.
968 * @param pAddr1 The first address.
969 * @param pAddr2 The second address.
970 * @param cbAddr The address size.
971 */
972DECLINLINE(bool) intnetR0AddrUIsEqualEx(PCRTNETADDRU pAddr1, PCRTNETADDRU pAddr2, uint8_t const cbAddr)
973{
974 switch (cbAddr)
975 {
976 case 4: /* IPv4 */
977 return pAddr1->au32[0] == pAddr2->au32[0];
978 case 16: /* IPv6 */
979 return pAddr1->au64[0] == pAddr2->au64[0]
980 && pAddr1->au64[1] == pAddr2->au64[1];
981 case 10: /* IPX */
982 return pAddr1->au64[0] == pAddr2->au64[0]
983 && pAddr1->au16[4] == pAddr2->au16[4];
984 default:
985 AssertFailedReturn(false);
986 }
987}
988
989
990/**
991 * Worker for intnetR0IfAddrCacheLookup that performs the lookup
992 * in the remaining cache entries after the caller has check the
993 * most likely ones.
994 *
995 * @returns -1 if not found, the index of the cache entry if found.
996 * @param pCache The cache.
997 * @param pAddr The address.
998 * @param cbAddr The address size (optimization).
999 */
1000static int intnetR0IfAddrCacheLookupSlow(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
1001{
1002 unsigned i = pCache->cEntries - 2;
1003 uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
1004 while (i >= 1)
1005 {
1006 if (intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr))
1007 return i;
1008 pbEntry -= pCache->cbEntry;
1009 i--;
1010 }
1011
1012 return -1;
1013}
1014
1015/**
1016 * Lookup an address in a cache without any expectations.
1017 *
1018 * @returns -1 if not found, the index of the cache entry if found.
1019 * @param pCache The cache.
1020 * @param pAddr The address.
1021 * @param cbAddr The address size (optimization).
1022 */
1023DECLINLINE(int) intnetR0IfAddrCacheLookup(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
1024{
1025 Assert(pCache->cbAddress == cbAddr);
1026
1027 /*
1028 * The optimized case is when there is one cache entry and
1029 * it doesn't match.
1030 */
1031 unsigned i = pCache->cEntries;
1032 if ( i > 0
1033 && intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr))
1034 return 0;
1035 if (i <= 1)
1036 return -1;
1037
1038 /*
1039 * Check the last entry.
1040 */
1041 i--;
1042 if (intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * i), pAddr, cbAddr))
1043 return i;
1044 if (i <= 1)
1045 return -1;
1046
1047 return intnetR0IfAddrCacheLookupSlow(pCache, pAddr, cbAddr);
1048}
1049
1050
1051/** Same as intnetR0IfAddrCacheLookup except we expect the address to be present already. */
1052DECLINLINE(int) intnetR0IfAddrCacheLookupLikely(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
1053{
1054 /** @todo implement this. */
1055 return intnetR0IfAddrCacheLookup(pCache, pAddr, cbAddr);
1056}
1057
1058#if 0 /* unused */
1059
1060/**
1061 * Worker for intnetR0IfAddrCacheLookupUnlikely that performs
1062 * the lookup in the remaining cache entries after the caller
1063 * has check the most likely ones.
1064 *
1065 * The routine is expecting not to find the address.
1066 *
1067 * @returns -1 if not found, the index of the cache entry if found.
1068 * @param pCache The cache.
1069 * @param pAddr The address.
1070 * @param cbAddr The address size (optimization).
1071 */
1072static int intnetR0IfAddrCacheInCacheUnlikelySlow(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
1073{
1074 /*
1075 * Perform a full table lookup.
1076 */
1077 unsigned i = pCache->cEntries - 2;
1078 uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
1079 while (i >= 1)
1080 {
1081 if (RT_UNLIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr)))
1082 return i;
1083 pbEntry -= pCache->cbEntry;
1084 i--;
1085 }
1086
1087 return -1;
1088}
1089
1090
1091/**
1092 * Lookup an address in a cache expecting not to find it.
1093 *
1094 * @returns -1 if not found, the index of the cache entry if found.
1095 * @param pCache The cache.
1096 * @param pAddr The address.
1097 * @param cbAddr The address size (optimization).
1098 */
1099DECLINLINE(int) intnetR0IfAddrCacheLookupUnlikely(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
1100{
1101 Assert(pCache->cbAddress == cbAddr);
1102
1103 /*
1104 * The optimized case is when there is one cache entry and
1105 * it doesn't match.
1106 */
1107 unsigned i = pCache->cEntries;
1108 if (RT_UNLIKELY( i > 0
1109 && intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr)))
1110 return 0;
1111 if (RT_LIKELY(i <= 1))
1112 return -1;
1113
1114 /*
1115 * Then check the last entry and return if there are just two cache entries.
1116 */
1117 i--;
1118 if (RT_UNLIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * i), pAddr, cbAddr)))
1119 return i;
1120 if (i <= 1)
1121 return -1;
1122
1123 return intnetR0IfAddrCacheInCacheUnlikelySlow(pCache, pAddr, cbAddr);
1124}
1125
1126#endif /* unused */
1127
1128
1129/**
1130 * Deletes a specific cache entry.
1131 *
1132 * Worker for intnetR0NetworkAddrCacheDelete and intnetR0NetworkAddrCacheDeleteMinusIf.
1133 *
1134 * @param pIf The interface (for logging).
1135 * @param pCache The cache.
1136 * @param iEntry The entry to delete.
1137 * @param pszMsg Log message.
1138 */
1139static void intnetR0IfAddrCacheDeleteIt(PINTNETIF pIf, PINTNETADDRCACHE pCache, int iEntry, const char *pszMsg)
1140{
1141 AssertReturnVoid(iEntry < pCache->cEntries);
1142 AssertReturnVoid(iEntry >= 0);
1143#ifdef LOG_ENABLED
1144 INTNETADDRTYPE enmAddrType = (INTNETADDRTYPE)(uintptr_t)(pCache - &pIf->aAddrCache[0]);
1145 PCRTNETADDRU pAddr = (PCRTNETADDRU)(pCache->pbEntries + iEntry * pCache->cbEntry);
1146 switch (enmAddrType)
1147 {
1148 case kIntNetAddrType_IPv4:
1149 Log(("intnetR0IfAddrCacheDeleteIt: hIf=%#x MAC=%.6Rhxs IPv4 deleted #%d %RTnaipv4 %s\n",
1150 pIf->hIf, &pIf->MacAddr, iEntry, pAddr->IPv4, pszMsg));
1151 break;
1152 case kIntNetAddrType_IPv6:
1153 Log(("intnetR0IfAddrCacheDeleteIt: hIf=%#x MAC=%.6Rhxs IPv6 deleted #%d %RTnaipv6 %s\n",
1154 pIf->hIf, &pIf->MacAddr, iEntry, &pAddr->IPv6, pszMsg));
1155 break;
1156 default:
1157 Log(("intnetR0IfAddrCacheDeleteIt: hIf=%RX32 MAC=%.6Rhxs type=%d #%d %.*Rhxs %s\n",
1158 pIf->hIf, &pIf->MacAddr, enmAddrType, iEntry, pCache->cbAddress, pAddr, pszMsg));
1159 break;
1160 }
1161#else
1162 RT_NOREF2(pIf, pszMsg);
1163#endif
1164
1165 pCache->cEntries--;
1166 if (iEntry < pCache->cEntries)
1167 memmove(pCache->pbEntries + iEntry * pCache->cbEntry,
1168 pCache->pbEntries + (iEntry + 1) * pCache->cbEntry,
1169 (pCache->cEntries - iEntry) * pCache->cbEntry);
1170}
1171
1172
1173/**
1174 * Deletes an address from the cache, assuming it isn't actually in the cache.
1175 *
1176 * May or may not own the spinlock when calling this.
1177 *
1178 * @param pIf The interface (for logging).
1179 * @param pCache The cache.
1180 * @param pAddr The address.
1181 * @param cbAddr The address size (optimization).
1182 */
1183DECLINLINE(void) intnetR0IfAddrCacheDelete(PINTNETIF pIf, PINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr, const char *pszMsg)
1184{
1185 int i = intnetR0IfAddrCacheLookup(pCache, pAddr, cbAddr);
1186 if (RT_UNLIKELY(i >= 0))
1187 intnetR0IfAddrCacheDeleteIt(pIf, pCache, i, pszMsg);
1188}
1189
1190
1191/**
1192 * Deletes the address from all the interface caches.
1193 *
1194 * This is used to remove stale entries that has been reassigned to
1195 * other machines on the network.
1196 *
1197 * @param pNetwork The network.
1198 * @param pAddr The address.
1199 * @param enmType The address type.
1200 * @param cbAddr The address size (optimization).
1201 * @param pszMsg Log message.
1202 */
1203DECLINLINE(void) intnetR0NetworkAddrCacheDeleteLocked(PINTNETNETWORK pNetwork,
1204 PCRTNETADDRU pAddr, INTNETADDRTYPE enmType,
1205 uint8_t const cbAddr,
1206 const char *pszMsg)
1207{
1208 uint32_t iIf = pNetwork->MacTab.cEntries;
1209 while (iIf--)
1210 {
1211 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
1212
1213 int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
1214 if (RT_UNLIKELY(i >= 0))
1215 intnetR0IfAddrCacheDeleteIt(pIf, &pIf->aAddrCache[enmType], i, pszMsg);
1216 }
1217}
1218
1219
1220/**
1221 * Deletes the address from all the interface caches.
1222 *
1223 * This is used to remove stale entries that has been reassigned to
1224 * other machines on the network.
1225 *
1226 * @param pNetwork The network.
1227 * @param pAddr The address.
1228 * @param enmType The address type.
1229 * @param cbAddr The address size (optimization).
1230 * @param pszMsg Log message.
1231 */
1232DECLINLINE(void) intnetR0NetworkAddrCacheDelete(PINTNETNETWORK pNetwork, PCRTNETADDRU pAddr, INTNETADDRTYPE const enmType,
1233 uint8_t const cbAddr, const char *pszMsg)
1234{
1235 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1236
1237 intnetR0NetworkAddrCacheDeleteLocked(pNetwork, pAddr, enmType, cbAddr, pszMsg);
1238
1239 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1240}
1241
1242
1243#if 0 /* unused */
1244/**
1245 * Deletes the address from all the interface caches except the specified one.
1246 *
1247 * This is used to remove stale entries that has been reassigned to
1248 * other machines on the network.
1249 *
1250 * @param pNetwork The network.
1251 * @param pAddr The address.
1252 * @param enmType The address type.
1253 * @param cbAddr The address size (optimization).
1254 * @param pszMsg Log message.
1255 */
1256DECLINLINE(void) intnetR0NetworkAddrCacheDeleteMinusIf(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, PCRTNETADDRU pAddr,
1257 INTNETADDRTYPE const enmType, uint8_t const cbAddr, const char *pszMsg)
1258{
1259 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1260
1261 uint32_t iIf = pNetwork->MacTab.cEntries;
1262 while (iIf--)
1263 {
1264 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
1265 if (pIf != pIfSender)
1266 {
1267 int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
1268 if (RT_UNLIKELY(i >= 0))
1269 intnetR0IfAddrCacheDeleteIt(pIf, &pIf->aAddrCache[enmType], i, pszMsg);
1270 }
1271 }
1272
1273 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1274}
1275#endif /* unused */
1276
1277
1278/**
1279 * Lookup an address on the network, returning the (first) interface having it
1280 * in its address cache.
1281 *
1282 * @returns Pointer to the interface on success, NULL if not found. The caller
1283 * must release the interface by calling intnetR0BusyDecIf.
1284 * @param pNetwork The network.
1285 * @param pAddr The address to lookup.
1286 * @param enmType The address type.
1287 * @param cbAddr The size of the address.
1288 */
1289DECLINLINE(PINTNETIF) intnetR0NetworkAddrCacheLookupIf(PINTNETNETWORK pNetwork, PCRTNETADDRU pAddr, INTNETADDRTYPE const enmType, uint8_t const cbAddr)
1290{
1291 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1292
1293 uint32_t iIf = pNetwork->MacTab.cEntries;
1294 while (iIf--)
1295 {
1296 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
1297 int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
1298 if (i >= 0)
1299 {
1300 intnetR0BusyIncIf(pIf);
1301 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1302 return pIf;
1303 }
1304 }
1305
1306 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1307 return NULL;
1308}
1309
1310
1311/**
1312 * Look up specified address in the network's blacklist.
1313 *
1314 * @param pNetwork The network.
1315 * @param enmType The address type.
1316 * @param pAddr The address.
1317 */
1318static bool intnetR0NetworkBlacklistLookup(PINTNETNETWORK pNetwork,
1319 PCRTNETADDRU pAddr, INTNETADDRTYPE enmType)
1320{
1321 PINTNETADDRCACHE pCache = &pNetwork->aAddrBlacklist[enmType];
1322
1323 if (RT_UNLIKELY(pCache->cEntriesAlloc == 0))
1324 return false;
1325
1326 const uint8_t cbAddr = pCache->cbAddress;
1327 Assert(cbAddr == intnetR0AddrSize(enmType));
1328
1329 for (unsigned i = 0; i < pCache->cEntries; ++i)
1330 {
1331 uint8_t *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
1332 if (intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr))
1333 return true;
1334 }
1335
1336 return false;
1337}
1338
1339
1340/**
1341 * Deletes specified address from network's blacklist.
1342 *
1343 * @param pNetwork The network.
1344 * @param enmType The address type.
1345 * @param pAddr The address.
1346 */
1347static void intnetR0NetworkBlacklistDelete(PINTNETNETWORK pNetwork,
1348 PCRTNETADDRU pAddr, INTNETADDRTYPE enmType)
1349{
1350 PINTNETADDRCACHE pCache = &pNetwork->aAddrBlacklist[enmType];
1351
1352 if (RT_UNLIKELY(pCache->cEntriesAlloc == 0))
1353 return;
1354
1355 const uint8_t cbAddr = pCache->cbAddress;
1356 Assert(cbAddr == intnetR0AddrSize(enmType));
1357
1358 for (unsigned i = 0; i < pCache->cEntries; ++i)
1359 {
1360 uint8_t *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
1361 if (!intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr))
1362 continue;
1363
1364 --pCache->cEntries;
1365 memmove(pCache->pbEntries + i * pCache->cbEntry,
1366 pCache->pbEntries + (i + 1) * pCache->cbEntry,
1367 (pCache->cEntries - i) * pCache->cbEntry);
1368 return;
1369 }
1370}
1371
1372
1373/**
1374 * Adds specified address from network's blacklist.
1375 *
1376 * @param pNetwork The network.
1377 * @param enmType The address type.
1378 * @param pAddr The address.
1379 */
1380static void intnetR0NetworkBlacklistAdd(PINTNETNETWORK pNetwork,
1381 PCRTNETADDRU pAddr, INTNETADDRTYPE enmType)
1382{
1383 PINTNETADDRCACHE pCache = &pNetwork->aAddrBlacklist[enmType];
1384
1385 if (RT_UNLIKELY(pCache->cEntriesAlloc == 0))
1386 return;
1387
1388 const uint8_t cbAddr = pCache->cbAddress;
1389 Assert(cbAddr == intnetR0AddrSize(enmType));
1390
1391 /* lookup */
1392 for (unsigned i = 0; i < pCache->cEntries; ++i)
1393 {
1394 uint8_t *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
1395 if (RT_UNLIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr)))
1396 return; /* already exists */
1397 }
1398
1399 if (pCache->cEntries >= pCache->cEntriesAlloc)
1400 {
1401 /* shift */
1402 memmove(pCache->pbEntries, pCache->pbEntries + pCache->cbEntry,
1403 pCache->cbEntry * (pCache->cEntries - 1));
1404 --pCache->cEntries;
1405 }
1406
1407 Assert(pCache->cEntries < pCache->cEntriesAlloc);
1408
1409 /* push */
1410 uint8_t *pbEntry = pCache->pbEntries + pCache->cEntries * pCache->cbEntry;
1411 memcpy(pbEntry, pAddr, cbAddr);
1412 memset(pbEntry + pCache->cbAddress, '\0', pCache->cbEntry - cbAddr);
1413 ++pCache->cEntries;
1414
1415 Assert(pCache->cEntries <= pCache->cEntriesAlloc);
1416}
1417
1418
1419/**
1420 * Adds an address to the cache, the caller is responsible for making sure it's
1421 * not already in the cache.
1422 *
1423 * The caller must not
1424 *
1425 * @param pIf The interface (for logging).
1426 * @param pCache The address cache.
1427 * @param pAddr The address.
1428 * @param pszMsg log message.
1429 */
1430static void intnetR0IfAddrCacheAddIt(PINTNETIF pIf, INTNETADDRTYPE enmAddrType, PCRTNETADDRU pAddr,
1431 const char *pszMsg)
1432{
1433 PINTNETNETWORK pNetwork = pIf->pNetwork;
1434 AssertReturnVoid(pNetwork);
1435
1436 PINTNETADDRCACHE pCache = &pIf->aAddrCache[enmAddrType];
1437
1438#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
1439 const uint8_t cbAddr = pCache->cbAddress;
1440 Assert(cbAddr == intnetR0AddrSize(enmAddrType));
1441#endif
1442
1443 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1444
1445 bool fBlacklisted = intnetR0NetworkBlacklistLookup(pNetwork, pAddr, enmAddrType);
1446 if (fBlacklisted)
1447 {
1448 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1449
1450#ifdef LOG_ENABLED
1451 switch (enmAddrType)
1452 {
1453 case kIntNetAddrType_IPv4:
1454 Log(("%s: spoofing attempt for %RTnaipv4\n",
1455 __FUNCTION__, pAddr->IPv4));
1456 break;
1457 case kIntNetAddrType_IPv6:
1458 Log(("%s: spoofing attempt for %RTnaipv6\n",
1459 __FUNCTION__, &pAddr->IPv6));
1460 break;
1461 default:
1462 Log(("%s: spoofing attempt for %.*Rhxs (type %d)\n",
1463 __FUNCTION__, cbAddr, pAddr, enmAddrType));
1464 break;
1465 }
1466#endif
1467 return;
1468 }
1469
1470 if (RT_UNLIKELY(!pCache->cEntriesAlloc))
1471 {
1472 /* This shouldn't happen*/
1473 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1474 return;
1475 }
1476
1477 /* When the table is full, drop the older entry (FIFO). Do proper ageing? */
1478 if (pCache->cEntries >= pCache->cEntriesAlloc)
1479 {
1480 Log(("intnetR0IfAddrCacheAddIt: type=%d replacing %.*Rhxs\n",
1481 (int)(uintptr_t)(pCache - &pIf->aAddrCache[0]), pCache->cbAddress, pCache->pbEntries));
1482 memmove(pCache->pbEntries, pCache->pbEntries + pCache->cbEntry, pCache->cbEntry * (pCache->cEntries - 1));
1483 pCache->cEntries--;
1484 Assert(pCache->cEntries < pCache->cEntriesAlloc);
1485 }
1486
1487 /*
1488 * Add the new entry to the end of the array.
1489 */
1490 uint8_t *pbEntry = pCache->pbEntries + pCache->cEntries * pCache->cbEntry;
1491 memcpy(pbEntry, pAddr, pCache->cbAddress);
1492 memset(pbEntry + pCache->cbAddress, '\0', pCache->cbEntry - pCache->cbAddress);
1493
1494#ifdef LOG_ENABLED
1495 switch (enmAddrType)
1496 {
1497 case kIntNetAddrType_IPv4:
1498 Log(("intnetR0IfAddrCacheAddIt: hIf=%#x MAC=%.6Rhxs IPv4 added #%d %RTnaipv4 %s\n",
1499 pIf->hIf, &pIf->MacAddr, pCache->cEntries, pAddr->IPv4, pszMsg));
1500 break;
1501 case kIntNetAddrType_IPv6:
1502 Log(("intnetR0IfAddrCacheAddIt: hIf=%#x MAC=%.6Rhxs IPv6 added #%d %RTnaipv6 %s\n",
1503 pIf->hIf, &pIf->MacAddr, pCache->cEntries, &pAddr->IPv6, pszMsg));
1504 break;
1505 default:
1506 Log(("intnetR0IfAddrCacheAddIt: hIf=%#x MAC=%.6Rhxs type=%d added #%d %.*Rhxs %s\n",
1507 pIf->hIf, &pIf->MacAddr, enmAddrType, pCache->cEntries, pCache->cbAddress, pAddr, pszMsg));
1508 break;
1509 }
1510#else
1511 RT_NOREF1(pszMsg);
1512#endif
1513 pCache->cEntries++;
1514 Assert(pCache->cEntries <= pCache->cEntriesAlloc);
1515
1516 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1517}
1518
1519
1520/**
1521 * A intnetR0IfAddrCacheAdd worker that performs the rest of the lookup.
1522 *
1523 * @param pIf The interface (for logging).
1524 * @param pCache The address cache.
1525 * @param pAddr The address.
1526 * @param cbAddr The size of the address (optimization).
1527 * @param pszMsg Log message.
1528 */
1529static void intnetR0IfAddrCacheAddSlow(PINTNETIF pIf, INTNETADDRTYPE enmAddrType, PCRTNETADDRU pAddr,
1530 const char *pszMsg)
1531{
1532 PINTNETADDRCACHE pCache = &pIf->aAddrCache[enmAddrType];
1533
1534 const uint8_t cbAddr = pCache->cbAddress;
1535 Assert(cbAddr == intnetR0AddrSize(enmAddrType));
1536
1537 /*
1538 * Check all but the first and last entries, the caller
1539 * has already checked those.
1540 */
1541 int i = pCache->cEntries - 2;
1542 uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry;
1543 while (i >= 1)
1544 {
1545 if (RT_LIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr)))
1546 return;
1547 pbEntry += pCache->cbEntry;
1548 i--;
1549 }
1550
1551 /*
1552 * Not found, add it.
1553 */
1554 intnetR0IfAddrCacheAddIt(pIf, enmAddrType, pAddr, pszMsg);
1555}
1556
1557
1558/**
1559 * Adds an address to the cache if it's not already there.
1560 *
1561 * Must not own any spinlocks when calling this function.
1562 *
1563 * @param pIf The interface (for logging).
1564 * @param pCache The address cache.
1565 * @param pAddr The address.
1566 * @param cbAddr The size of the address (optimization).
1567 * @param pszMsg Log message.
1568 */
1569DECLINLINE(void) intnetR0IfAddrCacheAdd(PINTNETIF pIf, INTNETADDRTYPE enmAddrType, PCRTNETADDRU pAddr,
1570 const char *pszMsg)
1571{
1572 PINTNETADDRCACHE pCache = &pIf->aAddrCache[enmAddrType];
1573
1574 const uint8_t cbAddr = pCache->cbAddress;
1575 Assert(cbAddr == intnetR0AddrSize(enmAddrType));
1576
1577 /*
1578 * The optimized case is when the address the first or last cache entry.
1579 */
1580 unsigned i = pCache->cEntries;
1581 if (RT_LIKELY( i > 0
1582 && ( intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr)
1583 || (i > 1
1584 && intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * (i-1)), pAddr, cbAddr))) ))
1585 return;
1586
1587 intnetR0IfAddrCacheAddSlow(pIf, enmAddrType, pAddr, pszMsg);
1588}
1589
1590
1591/**
1592 * Destroys the specified address cache.
1593 * @param pCache The address cache.
1594 */
1595static void intnetR0IfAddrCacheDestroy(PINTNETADDRCACHE pCache)
1596{
1597 void *pvFree = pCache->pbEntries;
1598 pCache->pbEntries = NULL;
1599 pCache->cEntries = 0;
1600 pCache->cEntriesAlloc = 0;
1601 RTMemFree(pvFree);
1602}
1603
1604
1605/**
1606 * Initialize the address cache for the specified address type.
1607 *
1608 * The cache storage is preallocated and fixed size so that we can handle
1609 * inserts from problematic contexts.
1610 *
1611 * @returns VINF_SUCCESS or VERR_NO_MEMORY.
1612 * @param pCache The cache to initialize.
1613 * @param enmAddrType The address type.
1614 * @param fEnabled Whether the address cache is enabled or not.
1615 */
1616static int intnetR0IfAddrCacheInit(PINTNETADDRCACHE pCache, INTNETADDRTYPE enmAddrType, bool fEnabled)
1617{
1618 pCache->cEntries = 0;
1619 pCache->cbAddress = intnetR0AddrSize(enmAddrType);
1620 pCache->cbEntry = RT_ALIGN(pCache->cbAddress, 4);
1621 if (fEnabled)
1622 {
1623 pCache->cEntriesAlloc = 32;
1624 pCache->pbEntries = (uint8_t *)RTMemAllocZ(pCache->cEntriesAlloc * pCache->cbEntry);
1625 if (!pCache->pbEntries)
1626 return VERR_NO_MEMORY;
1627 }
1628 else
1629 {
1630 pCache->cEntriesAlloc = 0;
1631 pCache->pbEntries = NULL;
1632 }
1633 return VINF_SUCCESS;
1634}
1635
1636
1637/**
1638 * Is it a multicast or broadcast MAC address?
1639 *
1640 * @returns true if multicast, false if not.
1641 * @param pMacAddr The address to inspect.
1642 */
1643DECL_FORCE_INLINE(bool) intnetR0IsMacAddrMulticast(PCRTMAC pMacAddr)
1644{
1645 return !!(pMacAddr->au8[0] & 0x01);
1646}
1647
1648
1649/**
1650 * Is it a dummy MAC address?
1651 *
1652 * We use dummy MAC addresses for interfaces which we don't know the MAC
1653 * address of because they haven't sent anything (learning) or explicitly set
1654 * it.
1655 *
1656 * @returns true if dummy, false if not.
1657 * @param pMacAddr The address to inspect.
1658 */
1659DECL_FORCE_INLINE(bool) intnetR0IsMacAddrDummy(PCRTMAC pMacAddr)
1660{
1661 /* The dummy address are broadcast addresses, don't bother check it all. */
1662 return pMacAddr->au16[0] == 0xffff;
1663}
1664
1665
1666/**
1667 * Compares two MAC addresses.
1668 *
1669 * @returns true if equal, false if not.
1670 * @param pDstAddr1 Address 1.
1671 * @param pDstAddr2 Address 2.
1672 */
1673DECL_FORCE_INLINE(bool) intnetR0AreMacAddrsEqual(PCRTMAC pDstAddr1, PCRTMAC pDstAddr2)
1674{
1675 return pDstAddr1->au16[2] == pDstAddr2->au16[2]
1676 && pDstAddr1->au16[1] == pDstAddr2->au16[1]
1677 && pDstAddr1->au16[0] == pDstAddr2->au16[0];
1678}
1679
1680
1681/**
1682 * Switch a unicast frame based on the network layer address (OSI level 3) and
1683 * return a destination table.
1684 *
1685 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1686 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1687 * @param pNetwork The network to switch on.
1688 * @param pDstMacAddr The destination MAC address.
1689 * @param enmL3AddrType The level-3 destination address type.
1690 * @param pL3Addr The level-3 destination address.
1691 * @param cbL3Addr The size of the level-3 destination address.
1692 * @param fSrc The frame source (INTNETTRUNKDIR_WIRE).
1693 * @param pDstTab The destination output table.
1694 */
1695static INTNETSWDECISION intnetR0NetworkSwitchLevel3(PINTNETNETWORK pNetwork, PCRTMAC pDstMacAddr,
1696 INTNETADDRTYPE enmL3AddrType, PCRTNETADDRU pL3Addr, uint8_t cbL3Addr,
1697 uint32_t fSrc, PINTNETDSTTAB pDstTab)
1698{
1699 Assert(fSrc == INTNETTRUNKDIR_WIRE);
1700
1701 /*
1702 * Grab the spinlock first and do the switching.
1703 */
1704 PINTNETMACTAB pTab = &pNetwork->MacTab;
1705 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1706
1707 pDstTab->fTrunkDst = 0;
1708 pDstTab->pTrunk = 0;
1709 pDstTab->cIfs = 0;
1710
1711 /* Find exactly matching or promiscuous interfaces. */
1712 uint32_t cExactHits = 0;
1713 uint32_t iIfMac = pTab->cEntries;
1714 while (iIfMac-- > 0)
1715 {
1716 if (pTab->paEntries[iIfMac].fActive)
1717 {
1718 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1719 bool fExact = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmL3AddrType], pL3Addr, cbL3Addr) >= 0;
1720 if (fExact || pTab->paEntries[iIfMac].fPromiscuousSeeTrunk)
1721 {
1722 cExactHits += fExact;
1723
1724 uint32_t iIfDst = pDstTab->cIfs++;
1725 pDstTab->aIfs[iIfDst].pIf = pIf;
1726 pDstTab->aIfs[iIfDst].fReplaceDstMac = fExact;
1727 intnetR0BusyIncIf(pIf);
1728
1729 if (fExact)
1730 pDstMacAddr = &pIf->MacAddr; /* Avoids duplicates being sent to the host. */
1731 }
1732 }
1733 }
1734
1735 /* Network only promicuous mode ifs should see related trunk traffic. */
1736 if ( cExactHits
1737 && fSrc
1738 && pNetwork->MacTab.cPromiscuousNoTrunkEntries)
1739 {
1740 iIfMac = pTab->cEntries;
1741 while (iIfMac-- > 0)
1742 {
1743 if ( pTab->paEntries[iIfMac].fActive
1744 && pTab->paEntries[iIfMac].fPromiscuousEff
1745 && !pTab->paEntries[iIfMac].fPromiscuousSeeTrunk)
1746 {
1747 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1748 if (intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmL3AddrType], pL3Addr, cbL3Addr) < 0)
1749 {
1750 uint32_t iIfDst = pDstTab->cIfs++;
1751 pDstTab->aIfs[iIfDst].pIf = pIf;
1752 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1753 intnetR0BusyIncIf(pIf);
1754 }
1755 }
1756 }
1757 }
1758
1759 /* Does it match the host, or is the host promiscuous? */
1760 if (pTab->fHostActive)
1761 {
1762 bool fExact = intnetR0AreMacAddrsEqual(&pTab->HostMac, pDstMacAddr);
1763 if ( fExact
1764 || intnetR0IsMacAddrDummy(&pTab->HostMac)
1765 || pTab->fHostPromiscuousEff)
1766 {
1767 cExactHits += fExact;
1768 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1769 }
1770 }
1771
1772 /* Hit the wire if there are no exact matches or if it's in promiscuous mode. */
1773 if (pTab->fWireActive && (!cExactHits || pTab->fWirePromiscuousEff))
1774 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1775 pDstTab->fTrunkDst &= ~fSrc;
1776 if (pDstTab->fTrunkDst)
1777 {
1778 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1779 pDstTab->pTrunk = pTrunk;
1780 intnetR0BusyIncTrunk(pTrunk);
1781 }
1782
1783 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1784 return pDstTab->cIfs
1785 ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST)
1786 : (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK);
1787}
1788
1789
1790/**
1791 * Pre-switch a unicast MAC address.
1792 *
1793 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1794 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1795 * @param pNetwork The network to switch on.
1796 * @param fSrc The frame source.
1797 * @param pSrcAddr The source address of the frame.
1798 * @param pDstAddr The destination address of the frame.
1799 */
1800static INTNETSWDECISION intnetR0NetworkPreSwitchUnicast(PINTNETNETWORK pNetwork, uint32_t fSrc, PCRTMAC pSrcAddr,
1801 PCRTMAC pDstAddr)
1802{
1803 Assert(!intnetR0IsMacAddrMulticast(pDstAddr));
1804 Assert(fSrc);
1805
1806 /*
1807 * Grab the spinlock first and do the switching.
1808 */
1809 INTNETSWDECISION enmSwDecision = INTNETSWDECISION_BROADCAST;
1810 PINTNETMACTAB pTab = &pNetwork->MacTab;
1811 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1812
1813 /* Iterate the internal network interfaces and look for matching source and
1814 destination addresses. */
1815 uint32_t iIfMac = pTab->cEntries;
1816 while (iIfMac-- > 0)
1817 {
1818 if (pTab->paEntries[iIfMac].fActive)
1819 {
1820 /* Unknown interface address? */
1821 if (intnetR0IsMacAddrDummy(&pTab->paEntries[iIfMac].MacAddr))
1822 break;
1823
1824 /* Paranoia - this shouldn't happen, right? */
1825 if ( pSrcAddr
1826 && intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pSrcAddr))
1827 break;
1828
1829 /* Exact match? */
1830 if (intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pDstAddr))
1831 {
1832 enmSwDecision = pTab->fHostPromiscuousEff && fSrc == INTNETTRUNKDIR_WIRE
1833 ? INTNETSWDECISION_BROADCAST
1834 : INTNETSWDECISION_INTNET;
1835 break;
1836 }
1837 }
1838 }
1839
1840 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1841 return enmSwDecision;
1842}
1843
1844
1845/**
1846 * Switch a unicast MAC address and return a destination table.
1847 *
1848 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1849 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1850 * @param pNetwork The network to switch on.
1851 * @param fSrc The frame source.
1852 * @param pIfSender The sender interface, NULL if trunk. Used to
1853 * prevent sending an echo to the sender.
1854 * @param pDstAddr The destination address of the frame.
1855 * @param pDstTab The destination output table.
1856 */
1857static INTNETSWDECISION intnetR0NetworkSwitchUnicast(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETIF pIfSender,
1858 PCRTMAC pDstAddr, PINTNETDSTTAB pDstTab)
1859{
1860 AssertPtr(pDstTab);
1861 Assert(!intnetR0IsMacAddrMulticast(pDstAddr));
1862
1863 /*
1864 * Grab the spinlock first and do the switching.
1865 */
1866 PINTNETMACTAB pTab = &pNetwork->MacTab;
1867 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1868
1869 pDstTab->fTrunkDst = 0;
1870 pDstTab->pTrunk = 0;
1871 pDstTab->cIfs = 0;
1872
1873 /* Find exactly matching or promiscuous interfaces. */
1874 uint32_t cExactHits = 0;
1875 uint32_t iIfMac = pTab->cEntries;
1876 while (iIfMac-- > 0)
1877 {
1878 if (pTab->paEntries[iIfMac].fActive)
1879 {
1880 bool fExact = intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pDstAddr);
1881 if ( fExact
1882 || intnetR0IsMacAddrDummy(&pTab->paEntries[iIfMac].MacAddr)
1883 || ( pTab->paEntries[iIfMac].fPromiscuousSeeTrunk
1884 || (!fSrc && pTab->paEntries[iIfMac].fPromiscuousEff) )
1885 )
1886 {
1887 cExactHits += fExact;
1888
1889 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1890 if (RT_LIKELY(pIf != pIfSender)) /* paranoia */
1891 {
1892 uint32_t iIfDst = pDstTab->cIfs++;
1893 pDstTab->aIfs[iIfDst].pIf = pIf;
1894 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1895 intnetR0BusyIncIf(pIf);
1896 }
1897 }
1898 }
1899 }
1900
1901 /* Network only promicuous mode ifs should see related trunk traffic. */
1902 if ( cExactHits
1903 && fSrc
1904 && pNetwork->MacTab.cPromiscuousNoTrunkEntries)
1905 {
1906 iIfMac = pTab->cEntries;
1907 while (iIfMac-- > 0)
1908 {
1909 if ( pTab->paEntries[iIfMac].fPromiscuousEff
1910 && !pTab->paEntries[iIfMac].fPromiscuousSeeTrunk
1911 && pTab->paEntries[iIfMac].fActive
1912 && !intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pDstAddr)
1913 && !intnetR0IsMacAddrDummy(&pTab->paEntries[iIfMac].MacAddr) )
1914 {
1915 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1916 uint32_t iIfDst = pDstTab->cIfs++;
1917 pDstTab->aIfs[iIfDst].pIf = pIf;
1918 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1919 intnetR0BusyIncIf(pIf);
1920 }
1921 }
1922 }
1923
1924 /* Does it match the host, or is the host promiscuous? */
1925 if ( fSrc != INTNETTRUNKDIR_HOST
1926 && pTab->fHostActive)
1927 {
1928 bool fExact = intnetR0AreMacAddrsEqual(&pTab->HostMac, pDstAddr);
1929 if ( fExact
1930 || intnetR0IsMacAddrDummy(&pTab->HostMac)
1931 || pTab->fHostPromiscuousEff)
1932 {
1933 cExactHits += fExact;
1934 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1935 }
1936 }
1937
1938 /* Hit the wire if there are no exact matches or if it's in promiscuous mode. */
1939 if ( fSrc != INTNETTRUNKDIR_WIRE
1940 && pTab->fWireActive
1941 && (!cExactHits || pTab->fWirePromiscuousEff)
1942 )
1943 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1944
1945 /* Grab the trunk if we're sending to it. */
1946 if (pDstTab->fTrunkDst)
1947 {
1948 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1949 pDstTab->pTrunk = pTrunk;
1950 intnetR0BusyIncTrunk(pTrunk);
1951 }
1952
1953 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1954 return pDstTab->cIfs
1955 ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST)
1956 : (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK);
1957}
1958
1959
1960/**
1961 * Create a destination table for a broadcast frame.
1962 *
1963 * @returns INTNETSWDECISION_BROADCAST.
1964 * @param pNetwork The network to switch on.
1965 * @param fSrc The frame source.
1966 * @param pIfSender The sender interface, NULL if trunk. Used to
1967 * prevent sending an echo to the sender.
1968 * @param pDstTab The destination output table.
1969 */
1970static INTNETSWDECISION intnetR0NetworkSwitchBroadcast(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETIF pIfSender,
1971 PINTNETDSTTAB pDstTab)
1972{
1973 AssertPtr(pDstTab);
1974
1975 /*
1976 * Grab the spinlock first and record all active interfaces.
1977 */
1978 PINTNETMACTAB pTab = &pNetwork->MacTab;
1979 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1980
1981 pDstTab->fTrunkDst = 0;
1982 pDstTab->pTrunk = 0;
1983 pDstTab->cIfs = 0;
1984
1985 /* Regular interfaces. */
1986 uint32_t iIfMac = pTab->cEntries;
1987 while (iIfMac-- > 0)
1988 {
1989 if (pTab->paEntries[iIfMac].fActive)
1990 {
1991 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1992 if (pIf != pIfSender)
1993 {
1994 uint32_t iIfDst = pDstTab->cIfs++;
1995 pDstTab->aIfs[iIfDst].pIf = pIf;
1996 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1997 intnetR0BusyIncIf(pIf);
1998 }
1999 }
2000 }
2001
2002 /* The trunk interface. */
2003 if (pTab->fHostActive)
2004 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
2005 if (pTab->fWireActive)
2006 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
2007 pDstTab->fTrunkDst &= ~fSrc;
2008 if (pDstTab->fTrunkDst)
2009 {
2010 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
2011 pDstTab->pTrunk = pTrunk;
2012 intnetR0BusyIncTrunk(pTrunk);
2013 }
2014
2015 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2016 return INTNETSWDECISION_BROADCAST;
2017}
2018
2019
2020/**
2021 * Create a destination table with the trunk and any promiscuous interfaces.
2022 *
2023 * This is only used in a fallback case of the level-3 switching, so we can
2024 * assume the wire as source and skip the sender interface filtering.
2025 *
2026 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
2027 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
2028 * @param pNetwork The network to switch on.
2029 * @param fSrc The frame source.
2030 * @param pDstTab The destination output table.
2031 */
2032static INTNETSWDECISION intnetR0NetworkSwitchTrunkAndPromisc(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETDSTTAB pDstTab)
2033{
2034 Assert(fSrc == INTNETTRUNKDIR_WIRE);
2035
2036 /*
2037 * Grab the spinlock first and do the switching.
2038 */
2039 PINTNETMACTAB pTab = &pNetwork->MacTab;
2040 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2041
2042 pDstTab->fTrunkDst = 0;
2043 pDstTab->pTrunk = 0;
2044 pDstTab->cIfs = 0;
2045
2046 /* Find promiscuous interfaces. */
2047 uint32_t iIfMac = pTab->cEntries;
2048 while (iIfMac-- > 0)
2049 {
2050 if ( pTab->paEntries[iIfMac].fActive
2051 && ( pTab->paEntries[iIfMac].fPromiscuousSeeTrunk
2052 || (!fSrc && pTab->paEntries[iIfMac].fPromiscuousEff) )
2053 )
2054 {
2055 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
2056 uint32_t iIfDst = pDstTab->cIfs++;
2057 pDstTab->aIfs[iIfDst].pIf = pIf;
2058 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
2059 intnetR0BusyIncIf(pIf);
2060 }
2061 }
2062
2063 /* The trunk interface. */
2064 if (pTab->fHostActive)
2065 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
2066 if (pTab->fWireActive)
2067 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
2068 pDstTab->fTrunkDst &= ~fSrc;
2069 if (pDstTab->fTrunkDst)
2070 {
2071 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
2072 pDstTab->pTrunk = pTrunk;
2073 intnetR0BusyIncTrunk(pTrunk);
2074 }
2075
2076 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2077 return !pDstTab->cIfs
2078 ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK)
2079 : (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST);
2080}
2081
2082
2083/**
2084 * Create a destination table for a trunk frame.
2085 *
2086 * @returns INTNETSWDECISION_BROADCAST.
2087 * @param pNetwork The network to switch on.
2088 * @param fSrc The frame source.
2089 * @param pDstTab The destination output table.
2090 */
2091static INTNETSWDECISION intnetR0NetworkSwitchTrunk(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETDSTTAB pDstTab)
2092{
2093 AssertPtr(pDstTab);
2094
2095 /*
2096 * Grab the spinlock first and record all active interfaces.
2097 */
2098 PINTNETMACTAB pTab= &pNetwork->MacTab;
2099 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2100
2101 pDstTab->fTrunkDst = 0;
2102 pDstTab->pTrunk = 0;
2103 pDstTab->cIfs = 0;
2104
2105 /* The trunk interface. */
2106 if (pTab->fHostActive)
2107 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
2108 if (pTab->fWireActive)
2109 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
2110 pDstTab->fTrunkDst &= ~fSrc;
2111 if (pDstTab->fTrunkDst)
2112 {
2113 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
2114 pDstTab->pTrunk = pTrunk;
2115 intnetR0BusyIncTrunk(pTrunk);
2116 }
2117
2118 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2119 return pDstTab->fTrunkDst ? INTNETSWDECISION_TRUNK : INTNETSWDECISION_DROP;
2120}
2121
2122
2123/**
2124 * Wrapper around RTMemAlloc for allocating a destination table.
2125 *
2126 * @returns VINF_SUCCESS or VERR_NO_MEMORY.
2127 * @param cEntries The size given as an entry count.
2128 * @param ppDstTab Where to store the pointer (always).
2129 */
2130DECLINLINE(int) intnetR0AllocDstTab(uint32_t cEntries, PINTNETDSTTAB *ppDstTab)
2131{
2132 PINTNETDSTTAB pDstTab;
2133 *ppDstTab = pDstTab = (PINTNETDSTTAB)RTMemAlloc(RT_UOFFSETOF_DYN(INTNETDSTTAB, aIfs[cEntries]));
2134 if (RT_UNLIKELY(!pDstTab))
2135 return VERR_NO_MEMORY;
2136 return VINF_SUCCESS;
2137}
2138
2139
2140/**
2141 * Ensures that there is space for another interface in the MAC address lookup
2142 * table as well as all the destination tables.
2143 *
2144 * The caller must own the create/open/destroy mutex.
2145 *
2146 * @returns VINF_SUCCESS, VERR_NO_MEMORY or VERR_OUT_OF_RANGE.
2147 * @param pNetwork The network to operate on.
2148 */
2149static int intnetR0NetworkEnsureTabSpace(PINTNETNETWORK pNetwork)
2150{
2151 /*
2152 * The cEntries and cEntriesAllocated members are only updated while
2153 * owning the big mutex, so we only need the spinlock when doing the
2154 * actual table replacing.
2155 */
2156 PINTNETMACTAB pTab = &pNetwork->MacTab;
2157 int rc = VINF_SUCCESS;
2158 AssertReturn(pTab->cEntries <= pTab->cEntriesAllocated, VERR_INTERNAL_ERROR_2);
2159 if (pTab->cEntries + 1 > pTab->cEntriesAllocated)
2160 {
2161 uint32_t const cAllocated = pTab->cEntriesAllocated + INTNET_GROW_DSTTAB_SIZE;
2162 if (cAllocated <= INTNET_MAX_IFS)
2163 {
2164 /*
2165 * Resize the destination tables first, this can be kind of tedious.
2166 */
2167 for (uint32_t i = 0; i < pTab->cEntries; i++)
2168 {
2169 PINTNETIF pIf = pTab->paEntries[i].pIf; AssertPtr(pIf);
2170 PINTNETDSTTAB pNew;
2171 rc = intnetR0AllocDstTab(cAllocated, &pNew);
2172 if (RT_FAILURE(rc))
2173 break;
2174
2175 for (;;)
2176 {
2177 PINTNETDSTTAB pOld = pIf->pDstTab;
2178 if ( pOld
2179 && ASMAtomicCmpXchgPtr(&pIf->pDstTab, pNew, pOld))
2180 {
2181 RTMemFree(pOld);
2182 break;
2183 }
2184 intnetR0BusyWait(pNetwork, &pIf->cBusy);
2185 }
2186 }
2187
2188 /*
2189 * The trunk.
2190 */
2191 if ( RT_SUCCESS(rc)
2192 && pNetwork->MacTab.pTrunk)
2193 {
2194 AssertCompileAdjacentMembers(INTNETTRUNKIF, apTaskDstTabs, apIntDstTabs);
2195 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
2196 PINTNETDSTTAB * const ppEndDstTab = &pTrunk->apIntDstTabs[pTrunk->cIntDstTabs];
2197 for (PINTNETDSTTAB *ppDstTab = &pTrunk->apTaskDstTabs[0];
2198 ppDstTab != ppEndDstTab && RT_SUCCESS(rc);
2199 ppDstTab++)
2200 {
2201 PINTNETDSTTAB pNew;
2202 rc = intnetR0AllocDstTab(cAllocated, &pNew);
2203 if (RT_FAILURE(rc))
2204 break;
2205
2206 for (;;)
2207 {
2208 RTSpinlockAcquire(pTrunk->hDstTabSpinlock);
2209 void *pvOld = *ppDstTab;
2210 if (pvOld)
2211 *ppDstTab = pNew;
2212 RTSpinlockRelease(pTrunk->hDstTabSpinlock);
2213 if (pvOld)
2214 {
2215 RTMemFree(pvOld);
2216 break;
2217 }
2218 intnetR0BusyWait(pNetwork, &pTrunk->cBusy);
2219 }
2220 }
2221 }
2222
2223 /*
2224 * The MAC Address table itself.
2225 */
2226 if (RT_SUCCESS(rc))
2227 {
2228 PINTNETMACTABENTRY paNew = (PINTNETMACTABENTRY)RTMemAlloc(sizeof(INTNETMACTABENTRY) * cAllocated);
2229 if (paNew)
2230 {
2231 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2232
2233 PINTNETMACTABENTRY paOld = pTab->paEntries;
2234 uint32_t i = pTab->cEntries;
2235 while (i-- > 0)
2236 {
2237 paNew[i] = paOld[i];
2238
2239 paOld[i].fActive = false;
2240 paOld[i].pIf = NULL;
2241 }
2242
2243 pTab->paEntries = paNew;
2244 pTab->cEntriesAllocated = cAllocated;
2245
2246 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2247
2248 RTMemFree(paOld);
2249 }
2250 else
2251 rc = VERR_NO_MEMORY;
2252 }
2253 }
2254 else
2255 rc = VERR_OUT_OF_RANGE;
2256 }
2257 return rc;
2258}
2259
2260
2261
2262
2263#ifdef INTNET_WITH_DHCP_SNOOPING
2264
2265/**
2266 * Snoops IP assignments and releases from the DHCPv4 traffic.
2267 *
2268 * The caller is responsible for making sure this traffic between the
2269 * BOOTPS and BOOTPC ports and validate the IP header. The UDP packet
2270 * need not be validated beyond the ports.
2271 *
2272 * @param pNetwork The network this frame was seen on.
2273 * @param pIpHdr Pointer to a valid IP header. This is for pseudo
2274 * header validation, so only the minimum header size
2275 * needs to be available and valid here.
2276 * @param pUdpHdr Pointer to the UDP header in the frame.
2277 * @param cbUdpPkt What's left of the frame when starting at the UDP header.
2278 * @param fGso Set if this is a GSO frame, clear if regular.
2279 */
2280static void intnetR0NetworkSnoopDhcp(PINTNETNETWORK pNetwork, PCRTNETIPV4 pIpHdr, PCRTNETUDP pUdpHdr, uint32_t cbUdpPkt)
2281{
2282 /*
2283 * Check if the DHCP message is valid and get the type.
2284 */
2285 if (!RTNetIPv4IsUDPValid(pIpHdr, pUdpHdr, pUdpHdr + 1, cbUdpPkt, true /*fCheckSum*/))
2286 {
2287 Log6(("Bad UDP packet\n"));
2288 return;
2289 }
2290 PCRTNETBOOTP pDhcp = (PCRTNETBOOTP)(pUdpHdr + 1);
2291 uint8_t MsgType;
2292 if (!RTNetIPv4IsDHCPValid(pUdpHdr, pDhcp, cbUdpPkt - sizeof(*pUdpHdr), &MsgType))
2293 {
2294 Log6(("Bad DHCP packet\n"));
2295 return;
2296 }
2297
2298#ifdef LOG_ENABLED
2299 /*
2300 * Log it.
2301 */
2302 const char *pszType = "unknown";
2303 switch (MsgType)
2304 {
2305 case RTNET_DHCP_MT_DISCOVER: pszType = "discover"; break;
2306 case RTNET_DHCP_MT_OFFER: pszType = "offer"; break;
2307 case RTNET_DHCP_MT_REQUEST: pszType = "request"; break;
2308 case RTNET_DHCP_MT_DECLINE: pszType = "decline"; break;
2309 case RTNET_DHCP_MT_ACK: pszType = "ack"; break;
2310 case RTNET_DHCP_MT_NAC: pszType = "nac"; break;
2311 case RTNET_DHCP_MT_RELEASE: pszType = "release"; break;
2312 case RTNET_DHCP_MT_INFORM: pszType = "inform"; break;
2313 }
2314 Log6(("DHCP msg: %d (%s) client %.6Rhxs ciaddr=%d.%d.%d.%d yiaddr=%d.%d.%d.%d\n", MsgType, pszType, &pDhcp->bp_chaddr,
2315 pDhcp->bp_ciaddr.au8[0], pDhcp->bp_ciaddr.au8[1], pDhcp->bp_ciaddr.au8[2], pDhcp->bp_ciaddr.au8[3],
2316 pDhcp->bp_yiaddr.au8[0], pDhcp->bp_yiaddr.au8[1], pDhcp->bp_yiaddr.au8[2], pDhcp->bp_yiaddr.au8[3]));
2317#endif /* LOG_EANBLED */
2318
2319 /*
2320 * Act upon the message.
2321 */
2322 switch (MsgType)
2323 {
2324#if 0
2325 case RTNET_DHCP_MT_REQUEST:
2326 /** @todo Check for valid non-broadcast requests w/ IP for any of the MACs we
2327 * know, and add the IP to the cache. */
2328 break;
2329#endif
2330
2331
2332 /*
2333 * Lookup the interface by its MAC address and insert the IPv4 address into the cache.
2334 * Delete the old client address first, just in case it changed in a renewal.
2335 */
2336 case RTNET_DHCP_MT_ACK:
2337 if (intnetR0IPv4AddrIsGood(pDhcp->bp_yiaddr))
2338 {
2339 PINTNETIF pMatchingIf = NULL;
2340 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2341
2342 uint32_t iIf = pNetwork->MacTab.cEntries;
2343 while (iIf-- > 0)
2344 {
2345 PINTNETIF pCur = pNetwork->MacTab.paEntries[iIf].pIf;
2346 if ( intnetR0IfHasMacAddr(pCur)
2347 && !memcmp(&pCur->MacAddr, &pDhcp->bp_chaddr, sizeof(RTMAC)))
2348 {
2349 intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
2350 (PCRTNETADDRU)&pDhcp->bp_ciaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_ACK");
2351 if (!pMatchingIf)
2352 {
2353 pMatchingIf = pCur;
2354 intnetR0BusyIncIf(pMatchingIf);
2355 }
2356 }
2357 }
2358
2359 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2360
2361 if (pMatchingIf)
2362 {
2363 intnetR0IfAddrCacheAdd(pMatchingIf, kIntNetAddrType_IPv4,
2364 (PCRTNETADDRU)&pDhcp->bp_yiaddr, "DHCP_MT_ACK");
2365 intnetR0BusyDecIf(pMatchingIf);
2366 }
2367 }
2368 return;
2369
2370
2371 /*
2372 * Lookup the interface by its MAC address and remove the IPv4 address(es) from the cache.
2373 */
2374 case RTNET_DHCP_MT_RELEASE:
2375 {
2376 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2377
2378 uint32_t iIf = pNetwork->MacTab.cEntries;
2379 while (iIf-- > 0)
2380 {
2381 PINTNETIF pCur = pNetwork->MacTab.paEntries[iIf].pIf;
2382 if ( intnetR0IfHasMacAddr(pCur)
2383 && !memcmp(&pCur->MacAddr, &pDhcp->bp_chaddr, sizeof(RTMAC)))
2384 {
2385 intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
2386 (PCRTNETADDRU)&pDhcp->bp_ciaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_RELEASE");
2387 intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
2388 (PCRTNETADDRU)&pDhcp->bp_yiaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_RELEASE");
2389 }
2390 }
2391
2392 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2393 break;
2394 }
2395 }
2396
2397}
2398
2399
2400/**
2401 * Worker for intnetR0TrunkIfSnoopAddr that takes care of what
2402 * is likely to be a DHCP message.
2403 *
2404 * The caller has already check that the UDP source and destination ports
2405 * are BOOTPS or BOOTPC.
2406 *
2407 * @param pNetwork The network this frame was seen on.
2408 * @param pSG The gather list for the frame.
2409 */
2410static void intnetR0TrunkIfSnoopDhcp(PINTNETNETWORK pNetwork, PCINTNETSG pSG)
2411{
2412 /*
2413 * Get a pointer to a linear copy of the full packet, using the
2414 * temporary buffer if necessary.
2415 */
2416 PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((PCRTNETETHERHDR)pSG->aSegs[0].pv + 1);
2417 uint32_t cbPacket = pSG->cbTotal - sizeof(RTNETETHERHDR);
2418 if (pSG->cSegsUsed > 1)
2419 {
2420 cbPacket = RT_MIN(cbPacket, INTNETNETWORK_TMP_SIZE);
2421 Log6(("intnetR0TrunkIfSnoopDhcp: Copying IPv4/UDP/DHCP pkt %u\n", cbPacket));
2422 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
2423 return;
2424 //pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
2425 pIpHdr = (PCRTNETIPV4)pNetwork->pbTmp;
2426 }
2427
2428 /*
2429 * Validate the IP header and find the UDP packet.
2430 */
2431 if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, pSG->cbTotal - sizeof(RTNETETHERHDR), true /*fChecksum*/))
2432 {
2433 Log(("intnetR0TrunkIfSnoopDhcp: bad ip header\n"));
2434 return;
2435 }
2436 uint32_t cbIpHdr = pIpHdr->ip_hl * 4;
2437
2438 /*
2439 * Hand it over to the common DHCP snooper.
2440 */
2441 intnetR0NetworkSnoopDhcp(pNetwork, pIpHdr, (PCRTNETUDP)((uintptr_t)pIpHdr + cbIpHdr), cbPacket - cbIpHdr);
2442}
2443
2444#endif /* INTNET_WITH_DHCP_SNOOPING */
2445
2446
2447/**
2448 * Snoops up source addresses from ARP requests and purge these from the address
2449 * caches.
2450 *
2451 * The purpose of this purging is to get rid of stale addresses.
2452 *
2453 * @param pNetwork The network this frame was seen on.
2454 * @param pSG The gather list for the frame.
2455 */
2456static void intnetR0TrunkIfSnoopArp(PINTNETNETWORK pNetwork, PCINTNETSG pSG)
2457{
2458 /*
2459 * Check the minimum size first.
2460 */
2461 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4)))
2462 return;
2463
2464 /*
2465 * Copy to temporary buffer if necessary.
2466 */
2467 uint32_t cbPacket = RT_MIN(pSG->cbTotal, sizeof(RTNETARPIPV4));
2468 PCRTNETARPIPV4 pArpIPv4 = (PCRTNETARPIPV4)((uintptr_t)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
2469 if ( pSG->cSegsUsed != 1
2470 && pSG->aSegs[0].cb < cbPacket)
2471 {
2472 if ( (pSG->fFlags & (INTNETSG_FLAGS_ARP_IPV4 | INTNETSG_FLAGS_PKT_CP_IN_TMP))
2473 != (INTNETSG_FLAGS_ARP_IPV4 | INTNETSG_FLAGS_PKT_CP_IN_TMP)
2474 && !intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
2475 return;
2476 pArpIPv4 = (PCRTNETARPIPV4)pNetwork->pbTmp;
2477 }
2478
2479 /*
2480 * Ignore packets which doesn't interest us or we perceive as malformed.
2481 */
2482 if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
2483 || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
2484 || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
2485 || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
2486 return;
2487 uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
2488 if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
2489 && ar_oper != RTNET_ARPOP_REPLY))
2490 {
2491 Log6(("ts-ar: op=%#x\n", ar_oper));
2492 return;
2493 }
2494
2495 /*
2496 * Delete the source address if it's OK.
2497 */
2498 if ( !intnetR0IsMacAddrMulticast(&pArpIPv4->ar_sha)
2499 && ( pArpIPv4->ar_sha.au16[0]
2500 || pArpIPv4->ar_sha.au16[1]
2501 || pArpIPv4->ar_sha.au16[2])
2502 && intnetR0IPv4AddrIsGood(pArpIPv4->ar_spa))
2503 {
2504 Log6(("ts-ar: %d.%d.%d.%d / %.6Rhxs\n", pArpIPv4->ar_spa.au8[0], pArpIPv4->ar_spa.au8[1],
2505 pArpIPv4->ar_spa.au8[2], pArpIPv4->ar_spa.au8[3], &pArpIPv4->ar_sha));
2506 intnetR0NetworkAddrCacheDelete(pNetwork, (PCRTNETADDRU)&pArpIPv4->ar_spa,
2507 kIntNetAddrType_IPv4, sizeof(pArpIPv4->ar_spa), "tif/arp");
2508 }
2509}
2510
2511
2512#ifdef INTNET_WITH_DHCP_SNOOPING
2513/**
2514 * Snoop up addresses from ARP and DHCP traffic from frames coming
2515 * over the trunk connection.
2516 *
2517 * The caller is responsible for do some basic filtering before calling
2518 * this function.
2519 * For IPv4 this means checking against the minimum DHCPv4 frame size.
2520 *
2521 * @param pNetwork The network.
2522 * @param pSG The SG list for the frame.
2523 * @param EtherType The Ethertype of the frame.
2524 */
2525static void intnetR0TrunkIfSnoopAddr(PINTNETNETWORK pNetwork, PCINTNETSG pSG, uint16_t EtherType)
2526{
2527 switch (EtherType)
2528 {
2529 case RTNET_ETHERTYPE_IPV4:
2530 {
2531 uint32_t cbIpHdr;
2532 uint8_t b;
2533
2534 Assert(pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN);
2535 if (pSG->aSegs[0].cb >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN)
2536 {
2537 /* check if the protocol is UDP */
2538 PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((uint8_t const *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
2539 if (pIpHdr->ip_p != RTNETIPV4_PROT_UDP)
2540 return;
2541
2542 /* get the TCP header length */
2543 cbIpHdr = pIpHdr->ip_hl * 4;
2544 }
2545 else
2546 {
2547 /* check if the protocol is UDP */
2548 if ( intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + RT_UOFFSETOF(RTNETIPV4, ip_p))
2549 != RTNETIPV4_PROT_UDP)
2550 return;
2551
2552 /* get the TCP header length */
2553 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + 0); /* (IPv4 first byte, a bitfield) */
2554 cbIpHdr = (b & 0x0f) * 4;
2555 }
2556 if (cbIpHdr < RTNETIPV4_MIN_LEN)
2557 return;
2558
2559 /* compare the ports. */
2560 if (pSG->aSegs[0].cb >= sizeof(RTNETETHERHDR) + cbIpHdr + RTNETUDP_MIN_LEN)
2561 {
2562 PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uint8_t const *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR) + cbIpHdr);
2563 if ( ( RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPS
2564 && RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPS)
2565 || ( RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPC
2566 && RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPC))
2567 return;
2568 }
2569 else
2570 {
2571 /* get the lower byte of the UDP source port number. */
2572 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_UOFFSETOF(RTNETUDP, uh_sport) + 1);
2573 if ( b != RTNETIPV4_PORT_BOOTPS
2574 && b != RTNETIPV4_PORT_BOOTPC)
2575 return;
2576 uint8_t SrcPort = b;
2577 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_UOFFSETOF(RTNETUDP, uh_sport));
2578 if (b)
2579 return;
2580
2581 /* get the lower byte of the UDP destination port number. */
2582 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_UOFFSETOF(RTNETUDP, uh_dport) + 1);
2583 if ( b != RTNETIPV4_PORT_BOOTPS
2584 && b != RTNETIPV4_PORT_BOOTPC)
2585 return;
2586 if (b == SrcPort)
2587 return;
2588 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_UOFFSETOF(RTNETUDP, uh_dport));
2589 if (b)
2590 return;
2591 }
2592 intnetR0TrunkIfSnoopDhcp(pNetwork, pSG);
2593 break;
2594 }
2595
2596 case RTNET_ETHERTYPE_ARP:
2597 intnetR0TrunkIfSnoopArp(pNetwork, pSG);
2598 break;
2599 }
2600}
2601#endif /* INTNET_WITH_DHCP_SNOOPING */
2602
2603/**
2604 * Deals with an IPv6 packet.
2605 *
2606 * This will fish out the source IP address and add it to the cache.
2607 * Then it will look for DHCPRELEASE requests (?) and anything else
2608 * that we might find useful later.
2609 *
2610 * @param pIf The interface that's sending the frame.
2611 * @param pIpHdr Pointer to the IPv4 header in the frame.
2612 * @param cbPacket The size of the packet, or more correctly the
2613 * size of the frame without the ethernet header.
2614 * @param fGso Set if this is a GSO frame, clear if regular.
2615 */
2616static void intnetR0IfSnoopIPv6SourceAddr(PINTNETIF pIf, PCRTNETIPV6 pIpHdr, uint32_t cbPacket, bool fGso)
2617{
2618 NOREF(fGso);
2619
2620 /*
2621 * Check the header size first to prevent access invalid data.
2622 */
2623 if (cbPacket < RTNETIPV6_MIN_LEN)
2624 return;
2625
2626 /*
2627 * If the source address is good (not multicast) and
2628 * not already in the address cache of the sender, add it.
2629 */
2630 RTNETADDRU Addr;
2631 Addr.IPv6 = pIpHdr->ip6_src;
2632
2633 if ( intnetR0IPv6AddrIsGood(Addr.IPv6) && (pIpHdr->ip6_hlim == 0xff)
2634 && intnetR0IfAddrCacheLookupLikely(&pIf->aAddrCache[kIntNetAddrType_IPv6], &Addr, sizeof(Addr.IPv6)) < 0)
2635 {
2636 intnetR0IfAddrCacheAdd(pIf, kIntNetAddrType_IPv6, &Addr, "if/ipv6");
2637 }
2638}
2639
2640
2641/**
2642 * Deals with an IPv4 packet.
2643 *
2644 * This will fish out the source IP address and add it to the cache.
2645 * Then it will look for DHCPRELEASE requests (?) and anything else
2646 * that we might find useful later.
2647 *
2648 * @param pIf The interface that's sending the frame.
2649 * @param pIpHdr Pointer to the IPv4 header in the frame.
2650 * @param cbPacket The size of the packet, or more correctly the
2651 * size of the frame without the ethernet header.
2652 * @param fGso Set if this is a GSO frame, clear if regular.
2653 */
2654static void intnetR0IfSnoopIPv4SourceAddr(PINTNETIF pIf, PCRTNETIPV4 pIpHdr, uint32_t cbPacket, bool fGso)
2655{
2656 /*
2657 * Check the header size first to prevent access invalid data.
2658 */
2659 if (cbPacket < RTNETIPV4_MIN_LEN)
2660 return;
2661 uint32_t cbHdr = (uint32_t)pIpHdr->ip_hl * 4;
2662 if ( cbHdr < RTNETIPV4_MIN_LEN
2663 || cbPacket < cbHdr)
2664 return;
2665
2666 /*
2667 * If the source address is good (not broadcast or my network) and
2668 * not already in the address cache of the sender, add it. Validate
2669 * the IP header before adding it.
2670 */
2671 bool fValidatedIpHdr = false;
2672 RTNETADDRU Addr;
2673 Addr.IPv4 = pIpHdr->ip_src;
2674 if ( intnetR0IPv4AddrIsGood(Addr.IPv4)
2675 && intnetR0IfAddrCacheLookupLikely(&pIf->aAddrCache[kIntNetAddrType_IPv4], &Addr, sizeof(Addr.IPv4)) < 0)
2676 {
2677 if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, cbPacket, !fGso /*fChecksum*/))
2678 {
2679 Log(("intnetR0IfSnoopIPv4SourceAddr: bad ip header\n"));
2680 return;
2681 }
2682
2683 intnetR0IfAddrCacheAddIt(pIf, kIntNetAddrType_IPv4, &Addr, "if/ipv4");
2684 fValidatedIpHdr = true;
2685 }
2686
2687#ifdef INTNET_WITH_DHCP_SNOOPING
2688 /*
2689 * Check for potential DHCP packets.
2690 */
2691 if ( pIpHdr->ip_p == RTNETIPV4_PROT_UDP /* DHCP is UDP. */
2692 && cbPacket >= cbHdr + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN /* Min DHCP packet len. */
2693 && !fGso) /* GSO is not applicable to DHCP traffic. */
2694 {
2695 PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uint8_t const *)pIpHdr + cbHdr);
2696 if ( ( RT_BE2H_U16(pUdpHdr->uh_dport) == RTNETIPV4_PORT_BOOTPS
2697 || RT_BE2H_U16(pUdpHdr->uh_sport) == RTNETIPV4_PORT_BOOTPS)
2698 && ( RT_BE2H_U16(pUdpHdr->uh_sport) == RTNETIPV4_PORT_BOOTPC
2699 || RT_BE2H_U16(pUdpHdr->uh_dport) == RTNETIPV4_PORT_BOOTPC))
2700 {
2701 if ( fValidatedIpHdr
2702 || RTNetIPv4IsHdrValid(pIpHdr, cbPacket, cbPacket, !fGso /*fChecksum*/))
2703 intnetR0NetworkSnoopDhcp(pIf->pNetwork, pIpHdr, pUdpHdr, cbPacket - cbHdr);
2704 else
2705 Log(("intnetR0IfSnoopIPv4SourceAddr: bad ip header (dhcp)\n"));
2706 }
2707 }
2708#endif /* INTNET_WITH_DHCP_SNOOPING */
2709}
2710
2711
2712/**
2713 * Snoop up source addresses from an ARP request or reply.
2714 *
2715 * @param pIf The interface that's sending the frame.
2716 * @param pHdr The ARP header.
2717 * @param cbPacket The size of the packet (might be larger than the ARP
2718 * request 'cause of min ethernet frame size).
2719 * @param pfSgFlags Pointer to the SG flags. This is used to tag the packet so we
2720 * don't have to repeat the frame parsing in intnetR0TrunkIfSend.
2721 */
2722static void intnetR0IfSnoopArpAddr(PINTNETIF pIf, PCRTNETARPIPV4 pArpIPv4, uint32_t cbPacket, uint16_t *pfSgFlags)
2723{
2724 /*
2725 * Ignore packets which doesn't interest us or we perceive as malformed.
2726 */
2727 if (RT_UNLIKELY(cbPacket < sizeof(RTNETARPIPV4)))
2728 return;
2729 if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
2730 || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
2731 || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
2732 || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
2733 return;
2734 uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
2735 if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
2736 && ar_oper != RTNET_ARPOP_REPLY))
2737 {
2738 Log6(("ar_oper=%#x\n", ar_oper));
2739 return;
2740 }
2741
2742 /*
2743 * Tag the SG as ARP IPv4 for later editing, then check for addresses
2744 * which can be removed or added to the address cache of the sender.
2745 */
2746 *pfSgFlags |= INTNETSG_FLAGS_ARP_IPV4;
2747
2748 if ( ar_oper == RTNET_ARPOP_REPLY
2749 && !intnetR0IsMacAddrMulticast(&pArpIPv4->ar_tha)
2750 && ( pArpIPv4->ar_tha.au16[0]
2751 || pArpIPv4->ar_tha.au16[1]
2752 || pArpIPv4->ar_tha.au16[2])
2753 && intnetR0IPv4AddrIsGood(pArpIPv4->ar_tpa))
2754 intnetR0IfAddrCacheDelete(pIf, &pIf->aAddrCache[kIntNetAddrType_IPv4],
2755 (PCRTNETADDRU)&pArpIPv4->ar_tpa, sizeof(RTNETADDRIPV4), "if/arp");
2756
2757 if ( !memcmp(&pArpIPv4->ar_sha, &pIf->MacAddr, sizeof(RTMAC))
2758 && intnetR0IPv4AddrIsGood(pArpIPv4->ar_spa))
2759 {
2760 intnetR0IfAddrCacheAdd(pIf, kIntNetAddrType_IPv4, (PCRTNETADDRU)&pArpIPv4->ar_spa, "if/arp");
2761 }
2762}
2763
2764
2765
2766/**
2767 * Checks packets send by a normal interface for new network
2768 * layer addresses.
2769 *
2770 * @param pIf The interface that's sending the frame.
2771 * @param pbFrame The frame.
2772 * @param cbFrame The size of the frame.
2773 * @param fGso Set if this is a GSO frame, clear if regular.
2774 * @param pfSgFlags Pointer to the SG flags. This is used to tag the packet so we
2775 * don't have to repeat the frame parsing in intnetR0TrunkIfSend.
2776 */
2777static void intnetR0IfSnoopAddr(PINTNETIF pIf, uint8_t const *pbFrame, uint32_t cbFrame, bool fGso, uint16_t *pfSgFlags)
2778{
2779 /*
2780 * Fish out the ethertype and look for stuff we can handle.
2781 */
2782 if (cbFrame <= sizeof(RTNETETHERHDR))
2783 return;
2784 cbFrame -= sizeof(RTNETETHERHDR);
2785
2786 uint16_t EtherType = RT_H2BE_U16(((PCRTNETETHERHDR)pbFrame)->EtherType);
2787 switch (EtherType)
2788 {
2789 case RTNET_ETHERTYPE_IPV4:
2790 intnetR0IfSnoopIPv4SourceAddr(pIf, (PCRTNETIPV4)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, fGso);
2791 break;
2792
2793 case RTNET_ETHERTYPE_IPV6:
2794 intnetR0IfSnoopIPv6SourceAddr(pIf, (PCRTNETIPV6)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, fGso);
2795 break;
2796
2797#if 0 /** @todo IntNet: implement IPX for wireless MAC sharing? */
2798 case RTNET_ETHERTYPE_IPX_1:
2799 case RTNET_ETHERTYPE_IPX_2:
2800 case RTNET_ETHERTYPE_IPX_3:
2801 intnetR0IfSnoopIpxSourceAddr(pIf, (PCINTNETIPX)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, pfSgFlags);
2802 break;
2803#endif
2804 case RTNET_ETHERTYPE_ARP:
2805 intnetR0IfSnoopArpAddr(pIf, (PCRTNETARPIPV4)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, pfSgFlags);
2806 break;
2807 }
2808}
2809
2810
2811/**
2812 * Writes a frame packet to the ring buffer.
2813 *
2814 * @returns VBox status code.
2815 * @param pBuf The buffer.
2816 * @param pRingBuf The ring buffer to read from.
2817 * @param pSG The gather list.
2818 * @param pNewDstMac Set the destination MAC address to the address if specified.
2819 */
2820static int intnetR0RingWriteFrame(PINTNETRINGBUF pRingBuf, PCINTNETSG pSG, PCRTMAC pNewDstMac)
2821{
2822 PINTNETHDR pHdr = NULL; /* shut up gcc*/
2823 void *pvDst = NULL; /* ditto */
2824 int rc;
2825 if (pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
2826 rc = IntNetRingAllocateFrame(pRingBuf, pSG->cbTotal, &pHdr, &pvDst);
2827 else
2828 rc = IntNetRingAllocateGsoFrame(pRingBuf, pSG->cbTotal, &pSG->GsoCtx, &pHdr, &pvDst);
2829 if (RT_SUCCESS(rc))
2830 {
2831 IntNetSgRead(pSG, pvDst);
2832 if (pNewDstMac)
2833 ((PRTNETETHERHDR)pvDst)->DstMac = *pNewDstMac;
2834
2835 IntNetRingCommitFrame(pRingBuf, pHdr);
2836 return VINF_SUCCESS;
2837 }
2838 return rc;
2839}
2840
2841
2842/**
2843 * Notifies consumers of incoming data from @a pIf that data is available.
2844 */
2845DECL_FORCE_INLINE(void) intnetR0IfNotifyRecv(PINTNETIF pIf)
2846{
2847#if !defined(VBOX_WITH_INTNET_SERVICE_IN_R3) || !defined(IN_RING3)
2848 RTSemEventSignal(pIf->hRecvEvent);
2849#else
2850 pIf->pfnRecvAvail(pIf->hIf, pIf->pvUserRecvAvail);
2851#endif
2852}
2853
2854
2855/**
2856 * Sends a frame to a specific interface.
2857 *
2858 * @param pIf The interface.
2859 * @param pIfSender The interface sending the frame. This is NULL if it's the trunk.
2860 * @param pSG The gather buffer which data is being sent to the interface.
2861 * @param pNewDstMac Set the destination MAC address to the address if specified.
2862 */
2863static void intnetR0IfSend(PINTNETIF pIf, PINTNETIF pIfSender, PINTNETSG pSG, PCRTMAC pNewDstMac)
2864{
2865 /*
2866 * Grab the receive/producer lock and copy over the frame.
2867 */
2868 RTSpinlockAcquire(pIf->hRecvInSpinlock);
2869 int rc = intnetR0RingWriteFrame(&pIf->pIntBuf->Recv, pSG, pNewDstMac);
2870 RTSpinlockRelease(pIf->hRecvInSpinlock);
2871 if (RT_SUCCESS(rc))
2872 {
2873 pIf->cYields = 0;
2874 intnetR0IfNotifyRecv(pIf);
2875 return;
2876 }
2877
2878 Log(("intnetR0IfSend: overflow cb=%d hIf=%RX32\n", pSG->cbTotal, pIf->hIf));
2879
2880 /*
2881 * Scheduling hack, for unicore machines primarily.
2882 */
2883 if ( pIf->fActive
2884 && pIf->cYields < 4 /* just twice */
2885 && pIfSender /* but not if it's from the trunk */
2886 && RTThreadPreemptIsEnabled(NIL_RTTHREAD)
2887 )
2888 {
2889 unsigned cYields = 2;
2890 while (--cYields > 0)
2891 {
2892 intnetR0IfNotifyRecv(pIf);
2893 RTThreadYield();
2894
2895 RTSpinlockAcquire(pIf->hRecvInSpinlock);
2896 rc = intnetR0RingWriteFrame(&pIf->pIntBuf->Recv, pSG, pNewDstMac);
2897 RTSpinlockRelease(pIf->hRecvInSpinlock);
2898 if (RT_SUCCESS(rc))
2899 {
2900 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatYieldsOk);
2901 intnetR0IfNotifyRecv(pIf);
2902 return;
2903 }
2904 pIf->cYields++;
2905 }
2906 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatYieldsNok);
2907 }
2908
2909 /* ok, the frame is lost. */
2910 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatLost);
2911 intnetR0IfNotifyRecv(pIf);
2912}
2913
2914
2915/**
2916 * Fallback path that does the GSO segmenting before passing the frame on to the
2917 * trunk interface.
2918 *
2919 * The caller holds the trunk lock.
2920 *
2921 * @param pThis The trunk.
2922 * @param pIfSender The IF sending the frame.
2923 * @param pSG Pointer to the gather list.
2924 * @param fDst The destination flags.
2925 */
2926static int intnetR0TrunkIfSendGsoFallback(PINTNETTRUNKIF pThis, PINTNETIF pIfSender, PINTNETSG pSG, uint32_t fDst)
2927{
2928 /*
2929 * Since we're only using this for GSO frame coming from the internal
2930 * network interfaces and never the trunk, we can assume there is only
2931 * one segment. This simplifies the code quite a bit.
2932 */
2933 Assert(PDMNetGsoIsValid(&pSG->GsoCtx, sizeof(pSG->GsoCtx), pSG->cbTotal));
2934 AssertReturn(pSG->cSegsUsed == 1, VERR_INTERNAL_ERROR_4);
2935
2936 union
2937 {
2938 uint8_t abBuf[sizeof(INTNETSG) + sizeof(INTNETSEG)];
2939 INTNETSG SG;
2940 } u;
2941
2942 /** @todo We have to adjust MSS so it does not exceed the value configured for
2943 * the host's interface.
2944 */
2945
2946 /*
2947 * Carve out the frame segments with the header and frame in different
2948 * scatter / gather segments.
2949 */
2950 uint32_t const cSegs = PDMNetGsoCalcSegmentCount(&pSG->GsoCtx, pSG->cbTotal);
2951 for (uint32_t iSeg = 0; iSeg < cSegs; iSeg++)
2952 {
2953 uint32_t cbSegPayload, cbSegHdrs;
2954 uint32_t offSegPayload = PDMNetGsoCarveSegment(&pSG->GsoCtx, (uint8_t *)pSG->aSegs[0].pv, pSG->cbTotal, iSeg, cSegs,
2955 pIfSender->abGsoHdrs, &cbSegHdrs, &cbSegPayload);
2956
2957 IntNetSgInitTempSegs(&u.SG, cbSegHdrs + cbSegPayload, 2, 2);
2958 u.SG.aSegs[0].Phys = NIL_RTHCPHYS;
2959 u.SG.aSegs[0].pv = pIfSender->abGsoHdrs;
2960 u.SG.aSegs[0].cb = cbSegHdrs;
2961 u.SG.aSegs[1].Phys = NIL_RTHCPHYS;
2962 u.SG.aSegs[1].pv = (uint8_t *)pSG->aSegs[0].pv + offSegPayload;
2963 u.SG.aSegs[1].cb = (uint32_t)cbSegPayload;
2964
2965 int rc = pThis->pIfPort->pfnXmit(pThis->pIfPort, pIfSender->pvIfData, &u.SG, fDst);
2966 if (RT_FAILURE(rc))
2967 return rc;
2968 }
2969 return VINF_SUCCESS;
2970}
2971
2972
2973/**
2974 * Checks if any of the given trunk destinations can handle this kind of GSO SG.
2975 *
2976 * @returns true if it can, false if it cannot.
2977 * @param pThis The trunk.
2978 * @param pSG The scatter / gather buffer.
2979 * @param fDst The destination mask.
2980 */
2981DECLINLINE(bool) intnetR0TrunkIfCanHandleGsoFrame(PINTNETTRUNKIF pThis, PINTNETSG pSG, uint32_t fDst)
2982{
2983 uint8_t u8Type = pSG->GsoCtx.u8Type;
2984 AssertReturn(u8Type < 32, false); /* paranoia */
2985 uint32_t fMask = RT_BIT_32(u8Type);
2986
2987 if (fDst == INTNETTRUNKDIR_HOST)
2988 return !!(pThis->fHostGsoCapabilites & fMask);
2989 if (fDst == INTNETTRUNKDIR_WIRE)
2990 return !!(pThis->fWireGsoCapabilites & fMask);
2991 Assert(fDst == (INTNETTRUNKDIR_WIRE | INTNETTRUNKDIR_HOST));
2992 return !!(pThis->fHostGsoCapabilites & pThis->fWireGsoCapabilites & fMask);
2993}
2994
2995
2996/**
2997 * Calculates the checksum of a full ipv6 frame.
2998 *
2999 * @returns 16-bit hecksum value.
3000 * @param pIpHdr The IPv6 header (network endian (big)).
3001 * @param bProtocol The protocol number. This can be the same as the
3002 * ip6_nxt field, but doesn't need to be.
3003 * @param cbPkt The packet size (host endian of course). This can
3004 * be the same as the ip6_plen field, but as with @a
3005 * bProtocol it won't be when extension headers are
3006 * present. For UDP this will be uh_ulen converted to
3007 * host endian.
3008 */
3009static uint16_t computeIPv6FullChecksum(PCRTNETIPV6 pIpHdr)
3010{
3011 uint16_t const *data;
3012 int len = RT_BE2H_U16(pIpHdr->ip6_plen);
3013 uint32_t sum = RTNetIPv6PseudoChecksum(pIpHdr);
3014
3015 /* add the payload */
3016 data = (uint16_t *) (pIpHdr + 1);
3017 while(len > 1)
3018 {
3019 sum += *(data);
3020 data++;
3021 len -= 2;
3022 }
3023
3024 if(len > 0)
3025 sum += *((uint8_t *) data);
3026
3027 while(sum >> 16)
3028 sum = (sum & 0xffff) + (sum >> 16);
3029
3030 return (uint16_t) ~sum;
3031}
3032
3033
3034/**
3035 * Rewrite VM MAC address with shared host MAC address inside IPv6
3036 * Neighbor Discovery datagrams.
3037 */
3038static void intnetR0TrunkSharedMacEditIPv6FromIntNet(PINTNETTRUNKIF pThis, PINTNETIF pIfSender,
3039 PRTNETETHERHDR pEthHdr, uint32_t cb)
3040{
3041 if (RT_UNLIKELY(cb < sizeof(*pEthHdr)))
3042 return;
3043
3044 /* have IPv6 header */
3045 PRTNETIPV6 pIPv6 = (PRTNETIPV6)(pEthHdr + 1);
3046 cb -= sizeof(*pEthHdr);
3047 if (RT_UNLIKELY(cb < sizeof(*pIPv6)))
3048 return;
3049
3050 if ( pIPv6->ip6_nxt != RTNETIPV6_PROT_ICMPV6
3051 || pIPv6->ip6_hlim != 0xff)
3052 return;
3053
3054 PRTNETICMPV6HDR pICMPv6 = (PRTNETICMPV6HDR)(pIPv6 + 1);
3055 cb -= sizeof(*pIPv6);
3056 if (RT_UNLIKELY(cb < sizeof(*pICMPv6)))
3057 return;
3058
3059 uint32_t hdrlen = 0;
3060 uint8_t llaopt = RTNETIPV6_ICMP_ND_SLLA_OPT;
3061
3062 uint8_t type = pICMPv6->icmp6_type;
3063 switch (type)
3064 {
3065 case RTNETIPV6_ICMP_TYPE_RS:
3066 hdrlen = 8;
3067 break;
3068
3069 case RTNETIPV6_ICMP_TYPE_RA:
3070 hdrlen = 16;
3071 break;
3072
3073 case RTNETIPV6_ICMP_TYPE_NS:
3074 hdrlen = 24;
3075 break;
3076
3077 case RTNETIPV6_ICMP_TYPE_NA:
3078 hdrlen = 24;
3079 llaopt = RTNETIPV6_ICMP_ND_TLLA_OPT;
3080 break;
3081
3082 default:
3083 return;
3084 }
3085
3086 AssertReturnVoid(hdrlen > 0);
3087 if (RT_UNLIKELY(cb < hdrlen))
3088 return;
3089
3090 if (RT_UNLIKELY(pICMPv6->icmp6_code != 0))
3091 return;
3092
3093 PRTNETNDP_LLA_OPT pLLAOpt = NULL;
3094 char *pOpt = (char *)pICMPv6 + hdrlen;
3095 cb -= hdrlen;
3096
3097 while (cb >= 8)
3098 {
3099 uint8_t opt = ((uint8_t *)pOpt)[0];
3100 uint32_t optlen = (uint32_t)((uint8_t *)pOpt)[1] * 8;
3101 if (RT_UNLIKELY(cb < optlen))
3102 return;
3103
3104 if (opt == llaopt)
3105 {
3106 if (RT_UNLIKELY(optlen != 8))
3107 return;
3108 pLLAOpt = (PRTNETNDP_LLA_OPT)pOpt;
3109 break;
3110 }
3111
3112 pOpt += optlen;
3113 cb -= optlen;
3114 }
3115
3116 if (pLLAOpt == NULL)
3117 return;
3118
3119 if (memcmp(&pLLAOpt->lla, &pIfSender->MacAddr, sizeof(RTMAC)) != 0)
3120 return;
3121
3122 /* overwrite VM's MAC with host's MAC */
3123 pLLAOpt->lla = pThis->MacAddr;
3124
3125 /* recompute the checksum */
3126 pICMPv6->icmp6_cksum = 0;
3127 pICMPv6->icmp6_cksum = computeIPv6FullChecksum(pIPv6);
3128}
3129
3130
3131/**
3132 * Sends a frame down the trunk.
3133 *
3134 * @param pThis The trunk.
3135 * @param pNetwork The network the frame is being sent to.
3136 * @param pIfSender The IF sending the frame. Used for MAC address
3137 * checks in shared MAC mode.
3138 * @param fDst The destination flags.
3139 * @param pSG Pointer to the gather list.
3140 */
3141static void intnetR0TrunkIfSend(PINTNETTRUNKIF pThis, PINTNETNETWORK pNetwork, PINTNETIF pIfSender,
3142 uint32_t fDst, PINTNETSG pSG)
3143{
3144 /*
3145 * Quick sanity check.
3146 */
3147 AssertPtr(pThis);
3148 AssertPtr(pNetwork);
3149 AssertPtr(pIfSender);
3150 AssertPtr(pSG);
3151 Assert(fDst);
3152 AssertReturnVoid(pThis->pIfPort);
3153
3154 /*
3155 * Edit the frame if we're sharing the MAC address with the host on the wire.
3156 *
3157 * If the frame is headed for both the host and the wire, we'll have to send
3158 * it to the host before making any modifications, and force the OS specific
3159 * backend to copy it. We do this by marking it as TEMP (which is always the
3160 * case right now).
3161 */
3162 if ( (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
3163 && (fDst & INTNETTRUNKDIR_WIRE))
3164 {
3165 /*
3166 * Dispatch it to the host before making changes.
3167 */
3168 if (fDst & INTNETTRUNKDIR_HOST)
3169 {
3170 Assert(pSG->fFlags & INTNETSG_FLAGS_TEMP); /* make sure copy is forced */
3171 intnetR0TrunkIfSend(pThis, pNetwork, pIfSender, INTNETTRUNKDIR_HOST, pSG);
3172 fDst &= ~INTNETTRUNKDIR_HOST;
3173 }
3174
3175 /*
3176 * Edit the source address so that it it's the same as the host.
3177 */
3178 /* ASSUME frame from IntNetR0IfSend! */
3179 AssertReturnVoid(pSG->cSegsUsed == 1);
3180 AssertReturnVoid(pSG->cbTotal >= sizeof(RTNETETHERHDR));
3181 AssertReturnVoid(pIfSender);
3182 PRTNETETHERHDR pEthHdr = (PRTNETETHERHDR)pSG->aSegs[0].pv;
3183
3184 pEthHdr->SrcMac = pThis->MacAddr;
3185
3186 /*
3187 * Deal with tags from the snooping phase.
3188 */
3189 if (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4)
3190 {
3191 /*
3192 * APR IPv4: replace hardware (MAC) addresses because these end up
3193 * in ARP caches. So, if we don't the other machines will
3194 * send the packets to the MAC address of the guest
3195 * instead of the one of the host, which won't work on
3196 * wireless of course...
3197 */
3198 PRTNETARPIPV4 pArp = (PRTNETARPIPV4)(pEthHdr + 1);
3199 if (!memcmp(&pArp->ar_sha, &pIfSender->MacAddr, sizeof(RTMAC)))
3200 {
3201 Log6(("tw: ar_sha %.6Rhxs -> %.6Rhxs\n", &pArp->ar_sha, &pThis->MacAddr));
3202 pArp->ar_sha = pThis->MacAddr;
3203 }
3204 if (!memcmp(&pArp->ar_tha, &pIfSender->MacAddr, sizeof(RTMAC))) /* just in case... */
3205 {
3206 Log6(("tw: ar_tha %.6Rhxs -> %.6Rhxs\n", &pArp->ar_tha, &pThis->MacAddr));
3207 pArp->ar_tha = pThis->MacAddr;
3208 }
3209 }
3210 else if (pEthHdr->EtherType == RT_H2N_U16_C(RTNET_ETHERTYPE_IPV6))
3211 {
3212 intnetR0TrunkSharedMacEditIPv6FromIntNet(pThis, pIfSender, pEthHdr, pSG->cbTotal);
3213 }
3214 }
3215
3216 /*
3217 * Send the frame, handling the GSO fallback.
3218 *
3219 * Note! The trunk implementation will re-check that the trunk is active
3220 * before sending, so we don't have to duplicate that effort here.
3221 */
3222 STAM_REL_PROFILE_START(&pIfSender->pIntBuf->StatSend2, a);
3223 int rc;
3224 if ( pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID
3225 || intnetR0TrunkIfCanHandleGsoFrame(pThis, pSG, fDst) )
3226 rc = pThis->pIfPort->pfnXmit(pThis->pIfPort, pIfSender->pvIfData, pSG, fDst);
3227 else
3228 rc = intnetR0TrunkIfSendGsoFallback(pThis, pIfSender, pSG, fDst);
3229 STAM_REL_PROFILE_STOP(&pIfSender->pIntBuf->StatSend2, a);
3230
3231 /** @todo failure statistics? */
3232 Log2(("intnetR0TrunkIfSend: %Rrc fDst=%d\n", rc, fDst)); NOREF(rc);
3233}
3234
3235
3236/**
3237 * Detect broadcasts packaged as unicast and convert them back to broadcast.
3238 *
3239 * WiFi routers try to use ethernet unicast instead of broadcast or
3240 * multicast when possible. Look inside the packet and fix up
3241 * ethernet destination to be proper broadcast or multicast if
3242 * necessary.
3243 *
3244 * @returns true broadcast (pEthHdr & pSG are modified), false if not.
3245 * @param pNetwork The network the frame is being sent to.
3246 * @param pSG Pointer to the gather list for the frame. The
3247 * ethernet destination address is modified when
3248 * returning true.
3249 * @param pEthHdr Pointer to the ethernet header. The ethernet
3250 * destination address is modified when returning true.
3251 */
3252static bool intnetR0NetworkSharedMacDetectAndFixBroadcast(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
3253{
3254 NOREF(pNetwork);
3255
3256 switch (pEthHdr->EtherType)
3257 {
3258 case RT_H2N_U16_C(RTNET_ETHERTYPE_ARP):
3259 {
3260 uint16_t ar_oper;
3261 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_UOFFSETOF(RTNETARPHDR, ar_oper),
3262 sizeof(ar_oper), &ar_oper))
3263 return false;
3264
3265 if (ar_oper == RT_H2N_U16_C(RTNET_ARPOP_REQUEST))
3266 {
3267 /* change to broadcast */
3268 pEthHdr->DstMac.au16[0] = 0xffff;
3269 pEthHdr->DstMac.au16[1] = 0xffff;
3270 pEthHdr->DstMac.au16[2] = 0xffff;
3271 }
3272 else
3273 return false;
3274 break;
3275 }
3276
3277 case RT_H2N_U16_C(RTNET_ETHERTYPE_IPV4):
3278 {
3279 RTNETADDRIPV4 ip_dst;
3280 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_UOFFSETOF(RTNETIPV4, ip_dst),
3281 sizeof(ip_dst), &ip_dst))
3282 return false;
3283
3284 if (ip_dst.u == 0xffffffff) /* 255.255.255.255? */
3285 {
3286 /* change to broadcast */
3287 pEthHdr->DstMac.au16[0] = 0xffff;
3288 pEthHdr->DstMac.au16[1] = 0xffff;
3289 pEthHdr->DstMac.au16[2] = 0xffff;
3290 }
3291 else if ((ip_dst.au8[0] & 0xf0) == 0xe0) /* IPv4 multicast? */
3292 {
3293 /* change to 01:00:5e:xx:xx:xx multicast ... */
3294 pEthHdr->DstMac.au8[0] = 0x01;
3295 pEthHdr->DstMac.au8[1] = 0x00;
3296 pEthHdr->DstMac.au8[2] = 0x5e;
3297 /* ... with lower 23 bits from the multicast IP address */
3298 pEthHdr->DstMac.au8[3] = ip_dst.au8[1] & 0x7f;
3299 pEthHdr->DstMac.au8[4] = ip_dst.au8[2];
3300 pEthHdr->DstMac.au8[5] = ip_dst.au8[3];
3301 }
3302 else
3303 return false;
3304 break;
3305 }
3306
3307 case RT_H2N_U16_C(RTNET_ETHERTYPE_IPV6):
3308 {
3309 RTNETADDRIPV6 ip6_dst;
3310 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_UOFFSETOF(RTNETIPV6, ip6_dst),
3311 sizeof(ip6_dst), &ip6_dst))
3312 return false;
3313
3314 if (ip6_dst.au8[0] == 0xff) /* IPv6 multicast? */
3315 {
3316 pEthHdr->DstMac.au16[0] = 0x3333;
3317 pEthHdr->DstMac.au16[1] = ip6_dst.au16[6];
3318 pEthHdr->DstMac.au16[2] = ip6_dst.au16[7];
3319 }
3320 else
3321 return false;
3322 break;
3323 }
3324
3325 default:
3326 return false;
3327 }
3328
3329
3330 /*
3331 * Update ethernet destination in the segment.
3332 */
3333 intnetR0SgWritePart(pSG, RT_UOFFSETOF(RTNETETHERHDR, DstMac), sizeof(pEthHdr->DstMac), &pEthHdr->DstMac);
3334
3335 return true;
3336}
3337
3338
3339/**
3340 * Snoops a multicast ICMPv6 ND DAD from the wire via the trunk connection.
3341 *
3342 * @param pNetwork The network the frame is being sent to.
3343 * @param pSG Pointer to the gather list for the frame.
3344 * @param pEthHdr Pointer to the ethernet header.
3345 */
3346static void intnetR0NetworkSnoopNAFromWire(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
3347{
3348 NOREF(pEthHdr);
3349
3350 /*
3351 * Check the minimum size and get a linear copy of the thing to work on,
3352 * using the temporary buffer if necessary.
3353 */
3354 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETIPV6) +
3355 sizeof(RTNETNDP)))
3356 return;
3357 PRTNETIPV6 pIPv6 = (PRTNETIPV6)((uint8_t *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
3358 if ( pSG->cSegsUsed != 1
3359 && pSG->aSegs[0].cb < sizeof(RTNETETHERHDR) + sizeof(RTNETIPV6) +
3360 sizeof(RTNETNDP))
3361 {
3362 Log6(("fw: Copying IPv6 pkt %u\n", sizeof(RTNETIPV6)));
3363 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), sizeof(RTNETIPV6)
3364 + sizeof(RTNETNDP), pNetwork->pbTmp))
3365 return;
3366 pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
3367 pIPv6 = (PRTNETIPV6)pNetwork->pbTmp;
3368 }
3369
3370 PCRTNETNDP pNd = (PCRTNETNDP) (pIPv6 + 1);
3371
3372 /*
3373 * a multicast NS with :: as source address means a DAD packet.
3374 * if it comes from the wire and we have the DAD'd address in our cache,
3375 * flush the entry as the address is being acquired by someone else on
3376 * the network.
3377 */
3378 if ( pIPv6->ip6_hlim == 0xff
3379 && pIPv6->ip6_nxt == RTNETIPV6_PROT_ICMPV6
3380 && pNd->Hdr.icmp6_type == RTNETIPV6_ICMP_TYPE_NS
3381 && pNd->Hdr.icmp6_code == 0
3382 && pIPv6->ip6_src.QWords.qw0 == 0
3383 && pIPv6->ip6_src.QWords.qw1 == 0)
3384 {
3385
3386 intnetR0NetworkAddrCacheDelete(pNetwork, (PCRTNETADDRU) &pNd->target_address,
3387 kIntNetAddrType_IPv6, sizeof(RTNETADDRIPV6), "tif/ip6");
3388 }
3389}
3390/**
3391 * Edits an ARP packet arriving from the wire via the trunk connection.
3392 *
3393 * @param pNetwork The network the frame is being sent to.
3394 * @param pSG Pointer to the gather list for the frame.
3395 * The flags and data content may be updated.
3396 * @param pEthHdr Pointer to the ethernet header. This may also be
3397 * updated if it's a unicast...
3398 */
3399static void intnetR0NetworkEditArpFromWire(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
3400{
3401 /*
3402 * Check the minimum size and get a linear copy of the thing to work on,
3403 * using the temporary buffer if necessary.
3404 */
3405 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4)))
3406 return;
3407 PRTNETARPIPV4 pArpIPv4 = (PRTNETARPIPV4)((uint8_t *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
3408 if ( pSG->cSegsUsed != 1
3409 && pSG->aSegs[0].cb < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4))
3410 {
3411 Log6(("fw: Copying ARP pkt %u\n", sizeof(RTNETARPIPV4)));
3412 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), sizeof(RTNETARPIPV4), pNetwork->pbTmp))
3413 return;
3414 pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
3415 pArpIPv4 = (PRTNETARPIPV4)pNetwork->pbTmp;
3416 }
3417
3418 /*
3419 * Ignore packets which doesn't interest us or we perceive as malformed.
3420 */
3421 if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
3422 || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
3423 || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
3424 || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
3425 return;
3426 uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
3427 if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
3428 && ar_oper != RTNET_ARPOP_REPLY))
3429 {
3430 Log6(("ar_oper=%#x\n", ar_oper));
3431 return;
3432 }
3433
3434 /* Tag it as ARP IPv4. */
3435 pSG->fFlags |= INTNETSG_FLAGS_ARP_IPV4;
3436
3437 /*
3438 * The thing we're interested in here is a reply to a query made by a guest
3439 * since we modified the MAC in the initial request the guest made.
3440 */
3441 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
3442 RTMAC MacAddrTrunk;
3443 if (pNetwork->MacTab.pTrunk)
3444 MacAddrTrunk = pNetwork->MacTab.pTrunk->MacAddr;
3445 else
3446 memset(&MacAddrTrunk, 0, sizeof(MacAddrTrunk));
3447 RTSpinlockRelease(pNetwork->hAddrSpinlock);
3448 if ( ar_oper == RTNET_ARPOP_REPLY
3449 && !memcmp(&pArpIPv4->ar_tha, &MacAddrTrunk, sizeof(RTMAC)))
3450 {
3451 PINTNETIF pIf = intnetR0NetworkAddrCacheLookupIf(pNetwork, (PCRTNETADDRU)&pArpIPv4->ar_tpa,
3452 kIntNetAddrType_IPv4, sizeof(pArpIPv4->ar_tpa));
3453 if (pIf)
3454 {
3455 Log6(("fw: ar_tha %.6Rhxs -> %.6Rhxs\n", &pArpIPv4->ar_tha, &pIf->MacAddr));
3456 pArpIPv4->ar_tha = pIf->MacAddr;
3457 if (!memcmp(&pEthHdr->DstMac, &MacAddrTrunk, sizeof(RTMAC)))
3458 {
3459 Log6(("fw: DstMac %.6Rhxs -> %.6Rhxs\n", &pEthHdr->DstMac, &pIf->MacAddr));
3460 pEthHdr->DstMac = pIf->MacAddr;
3461 if ((void *)pEthHdr != pSG->aSegs[0].pv)
3462 intnetR0SgWritePart(pSG, RT_UOFFSETOF(RTNETETHERHDR, DstMac), sizeof(RTMAC), &pIf->MacAddr);
3463 }
3464 intnetR0BusyDecIf(pIf);
3465
3466 /* Write back the packet if we've been making changes to a buffered copy. */
3467 if (pSG->fFlags & INTNETSG_FLAGS_PKT_CP_IN_TMP)
3468 intnetR0SgWritePart(pSG, sizeof(RTNETETHERHDR), sizeof(PRTNETARPIPV4), pArpIPv4);
3469 }
3470 }
3471}
3472
3473
3474/**
3475 * Detects and edits an DHCP packet arriving from the internal net.
3476 *
3477 * @param pNetwork The network the frame is being sent to.
3478 * @param pSG Pointer to the gather list for the frame.
3479 * The flags and data content may be updated.
3480 * @param pEthHdr Pointer to the ethernet header. This may also be
3481 * updated if it's a unicast...
3482 */
3483static void intnetR0NetworkEditDhcpFromIntNet(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
3484{
3485 NOREF(pEthHdr);
3486
3487 /*
3488 * Check the minimum size and get a linear copy of the thing to work on,
3489 * using the temporary buffer if necessary.
3490 */
3491 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN))
3492 return;
3493 /*
3494 * Get a pointer to a linear copy of the full packet, using the
3495 * temporary buffer if necessary.
3496 */
3497 PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((PCRTNETETHERHDR)pSG->aSegs[0].pv + 1);
3498 uint32_t cbPacket = pSG->cbTotal - sizeof(RTNETETHERHDR);
3499 if (pSG->cSegsUsed > 1)
3500 {
3501 cbPacket = RT_MIN(cbPacket, INTNETNETWORK_TMP_SIZE);
3502 Log6(("intnetR0NetworkEditDhcpFromIntNet: Copying IPv4/UDP/DHCP pkt %u\n", cbPacket));
3503 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
3504 return;
3505 //pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
3506 pIpHdr = (PCRTNETIPV4)pNetwork->pbTmp;
3507 }
3508
3509 /*
3510 * Validate the IP header and find the UDP packet.
3511 */
3512 if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, pSG->cbTotal - sizeof(RTNETETHERHDR), true /*fCheckSum*/))
3513 {
3514 Log6(("intnetR0NetworkEditDhcpFromIntNet: bad ip header\n"));
3515 return;
3516 }
3517 size_t cbIpHdr = pIpHdr->ip_hl * 4;
3518 if ( pIpHdr->ip_p != RTNETIPV4_PROT_UDP /* DHCP is UDP. */
3519 || cbPacket < cbIpHdr + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN) /* Min DHCP packet len */
3520 return;
3521
3522 size_t cbUdpPkt = cbPacket - cbIpHdr;
3523 PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uintptr_t)pIpHdr + cbIpHdr);
3524 /* We are only interested in DHCP packets coming from client to server. */
3525 if ( RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPS
3526 || RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPC)
3527 return;
3528
3529 /*
3530 * Check if the DHCP message is valid and get the type.
3531 */
3532 if (!RTNetIPv4IsUDPValid(pIpHdr, pUdpHdr, pUdpHdr + 1, cbUdpPkt, true /*fCheckSum*/))
3533 {
3534 Log6(("intnetR0NetworkEditDhcpFromIntNet: Bad UDP packet\n"));
3535 return;
3536 }
3537 PCRTNETBOOTP pDhcp = (PCRTNETBOOTP)(pUdpHdr + 1);
3538 uint8_t bMsgType;
3539 if (!RTNetIPv4IsDHCPValid(pUdpHdr, pDhcp, cbUdpPkt - sizeof(*pUdpHdr), &bMsgType))
3540 {
3541 Log6(("intnetR0NetworkEditDhcpFromIntNet: Bad DHCP packet\n"));
3542 return;
3543 }
3544
3545 switch (bMsgType)
3546 {
3547 case RTNET_DHCP_MT_DISCOVER:
3548 case RTNET_DHCP_MT_REQUEST:
3549 /*
3550 * Must set the broadcast flag or we won't catch the respons.
3551 */
3552 if (!(pDhcp->bp_flags & RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST)))
3553 {
3554 Log6(("intnetR0NetworkEditDhcpFromIntNet: Setting broadcast flag in DHCP %#x, previously %x\n",
3555 bMsgType, pDhcp->bp_flags));
3556
3557 /* Patch flags */
3558 uint16_t uFlags = pDhcp->bp_flags | RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST);
3559 intnetR0SgWritePart(pSG, (uintptr_t)&pDhcp->bp_flags - (uintptr_t)pIpHdr + sizeof(RTNETETHERHDR), sizeof(uFlags), &uFlags);
3560
3561 /* Patch UDP checksum */
3562 if (pUdpHdr->uh_sum != 0)
3563 {
3564 uint32_t uChecksum = (uint32_t)~pUdpHdr->uh_sum + RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST);
3565 while (uChecksum >> 16)
3566 uChecksum = (uChecksum >> 16) + (uChecksum & 0xFFFF);
3567 uChecksum = ~uChecksum;
3568 intnetR0SgWritePart(pSG,
3569 (uintptr_t)&pUdpHdr->uh_sum - (uintptr_t)pIpHdr + sizeof(RTNETETHERHDR),
3570 sizeof(pUdpHdr->uh_sum),
3571 &uChecksum);
3572 }
3573 }
3574
3575#ifdef RT_OS_DARWIN
3576 /*
3577 * Work around little endian checksum issue in mac os x 10.7.0 GM.
3578 */
3579 if ( pIpHdr->ip_tos
3580 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_WORKAROUND_1))
3581 {
3582 /* Patch it. */
3583 uint8_t uTos = pIpHdr->ip_tos;
3584 uint8_t uZero = 0;
3585 intnetR0SgWritePart(pSG, sizeof(RTNETETHERHDR) + 1, sizeof(uZero), &uZero);
3586
3587 /* Patch the IP header checksum. */
3588 uint32_t uChecksum = (uint32_t)~pIpHdr->ip_sum - (uTos << 8);
3589 while (uChecksum >> 16)
3590 uChecksum = (uChecksum >> 16) + (uChecksum & 0xFFFF);
3591 uChecksum = ~uChecksum;
3592
3593 Log(("intnetR0NetworkEditDhcpFromIntNet: cleared ip_tos (was %#04x); ip_sum=%#06x -> %#06x\n",
3594 uTos, RT_BE2H_U16(pIpHdr->ip_sum), RT_BE2H_U16(uChecksum) ));
3595 intnetR0SgWritePart(pSG, sizeof(RTNETETHERHDR) + RT_UOFFSETOF(RTNETIPV4, ip_sum),
3596 sizeof(pIpHdr->ip_sum), &uChecksum);
3597 }
3598#endif
3599 break;
3600 }
3601}
3602
3603
3604/**
3605 * Checks if the callers context is okay for sending to the specified
3606 * destinations.
3607 *
3608 * @returns true if it's okay, false if it isn't.
3609 * @param pNetwork The network.
3610 * @param pIfSender The interface sending or NULL if it's the trunk.
3611 * @param pDstTab The destination table.
3612 */
3613DECLINLINE(bool) intnetR0NetworkIsContextOk(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, PCINTNETDSTTAB pDstTab)
3614{
3615 NOREF(pNetwork);
3616
3617 /* Sending to the trunk is the problematic path. If the trunk is the
3618 sender we won't be sending to it, so no problem..
3619 Note! fTrunkDst may be set event if if the trunk is the sender. */
3620 if (!pIfSender)
3621 return true;
3622
3623 uint32_t const fTrunkDst = pDstTab->fTrunkDst;
3624 if (!fTrunkDst)
3625 return true;
3626
3627 /* ASSUMES: that the trunk won't change its report while we're checking. */
3628 PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
3629 if (pTrunk && (fTrunkDst & pTrunk->fNoPreemptDsts) == fTrunkDst)
3630 return true;
3631
3632 /* ASSUMES: That a preemption test detects HM contexts. (Will work on
3633 non-preemptive systems as well.) */
3634 if (RTThreadPreemptIsEnabled(NIL_RTTHREAD))
3635 return true;
3636 return false;
3637}
3638
3639
3640/**
3641 * Checks if the callers context is okay for doing a broadcast given the
3642 * specified source.
3643 *
3644 * @returns true if it's okay, false if it isn't.
3645 * @param pNetwork The network.
3646 * @param fSrc The source of the packet. (0 (intnet),
3647 * INTNETTRUNKDIR_HOST or INTNETTRUNKDIR_WIRE).
3648 */
3649DECLINLINE(bool) intnetR0NetworkIsContextOkForBroadcast(PINTNETNETWORK pNetwork, uint32_t fSrc)
3650{
3651 /* Sending to the trunk is the problematic path. If the trunk is the
3652 sender we won't be sending to it, so no problem. */
3653 if (fSrc)
3654 return true;
3655
3656 /* ASSUMES: That a preemption test detects HM contexts. (Will work on
3657 non-preemptive systems as well.) */
3658 if (RTThreadPreemptIsEnabled(NIL_RTTHREAD))
3659 return true;
3660
3661 /* PARANOIA: Grab the spinlock to make sure the trunk structure cannot be
3662 freed while we're touching it. */
3663 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
3664 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
3665
3666 bool fRc = !pTrunk
3667 || pTrunk->fNoPreemptDsts == (INTNETTRUNKDIR_HOST | INTNETTRUNKDIR_WIRE)
3668 || ( (!pNetwork->MacTab.fHostActive || (pTrunk->fNoPreemptDsts & INTNETTRUNKDIR_HOST) )
3669 && (!pNetwork->MacTab.fWireActive || (pTrunk->fNoPreemptDsts & INTNETTRUNKDIR_WIRE) ) );
3670
3671 RTSpinlockRelease(pNetwork->hAddrSpinlock);
3672
3673 return fRc;
3674}
3675
3676
3677/**
3678 * Check context, edit, snoop and switch a broadcast frame when sharing MAC
3679 * address on the wire.
3680 *
3681 * The caller must hold at least one interface on the network busy to prevent it
3682 * from destructing beath us.
3683 *
3684 * @param pNetwork The network the frame is being sent to.
3685 * @param fSrc The source of the packet. (0 (intnet),
3686 * INTNETTRUNKDIR_HOST or INTNETTRUNKDIR_WIRE).
3687 * @param pIfSender The sender interface, NULL if trunk. Used to
3688 * prevent sending an echo to the sender.
3689 * @param pSG Pointer to the gather list.
3690 * @param pEthHdr Pointer to the ethernet header.
3691 * @param pDstTab The destination output table.
3692 */
3693static INTNETSWDECISION intnetR0NetworkSharedMacFixAndSwitchBroadcast(PINTNETNETWORK pNetwork,
3694 uint32_t fSrc, PINTNETIF pIfSender,
3695 PINTNETSG pSG, PRTNETETHERHDR pEthHdr,
3696 PINTNETDSTTAB pDstTab)
3697{
3698 /*
3699 * Before doing any work here, we need to figure out if we can handle it
3700 * in the current context. The restrictions are solely on the trunk.
3701 *
3702 * Note! Since at least one interface is busy, there won't be any changes
3703 * to the parameters here (unless the trunk changes its capability
3704 * report, which it shouldn't).
3705 */
3706 if (!intnetR0NetworkIsContextOkForBroadcast(pNetwork, fSrc))
3707 return INTNETSWDECISION_BAD_CONTEXT;
3708
3709 /*
3710 * Check for ICMPv6 Neighbor Advertisements coming from the trunk.
3711 * If we see an advertisement for an IP in our cache, we can safely remove
3712 * it as the IP has probably moved.
3713 */
3714 if ( (fSrc & INTNETTRUNKDIR_WIRE)
3715 && RT_BE2H_U16(pEthHdr->EtherType) == RTNET_ETHERTYPE_IPV6
3716 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3717 intnetR0NetworkSnoopNAFromWire(pNetwork, pSG, pEthHdr);
3718
3719
3720 /*
3721 * Check for ARP packets from the wire since we'll have to make
3722 * modification to them if we're sharing the MAC address with the host.
3723 */
3724 if ( (fSrc & INTNETTRUNKDIR_WIRE)
3725 && RT_BE2H_U16(pEthHdr->EtherType) == RTNET_ETHERTYPE_ARP
3726 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3727 intnetR0NetworkEditArpFromWire(pNetwork, pSG, pEthHdr);
3728
3729 /*
3730 * Check for DHCP packets from the internal net since we'll have to set
3731 * broadcast flag in DHCP requests if we're sharing the MAC address with
3732 * the host. GSO is not applicable to DHCP traffic.
3733 */
3734 if ( !fSrc
3735 && RT_BE2H_U16(pEthHdr->EtherType) == RTNET_ETHERTYPE_IPV4
3736 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3737 intnetR0NetworkEditDhcpFromIntNet(pNetwork, pSG, pEthHdr);
3738
3739 /*
3740 * Snoop address info from packet originating from the trunk connection.
3741 */
3742 if (fSrc)
3743 {
3744#ifdef INTNET_WITH_DHCP_SNOOPING
3745 uint16_t EtherType = RT_BE2H_U16(pEthHdr->EtherType);
3746 if ( ( EtherType == RTNET_ETHERTYPE_IPV4 /* for DHCP */
3747 && pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN
3748 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID )
3749 || (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4) )
3750 intnetR0TrunkIfSnoopAddr(pNetwork, pSG, EtherType);
3751#else
3752 if (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4)
3753 intnetR0TrunkIfSnoopArp(pNetwork, pSG);
3754#endif
3755 }
3756
3757 /*
3758 * Create the broadcast destination table.
3759 */
3760 return intnetR0NetworkSwitchBroadcast(pNetwork, fSrc, pIfSender, pDstTab);
3761}
3762
3763
3764/**
3765 * Check context, snoop and switch a unicast frame using the network layer
3766 * address of the link layer one (when sharing MAC address on the wire).
3767 *
3768 * This function is only used for frames coming from the wire (trunk).
3769 *
3770 * @returns true if it's addressed to someone on the network, otherwise false.
3771 * @param pNetwork The network the frame is being sent to.
3772 * @param pSG Pointer to the gather list.
3773 * @param pEthHdr Pointer to the ethernet header.
3774 * @param pDstTab The destination output table.
3775 */
3776static INTNETSWDECISION intnetR0NetworkSharedMacFixAndSwitchUnicast(PINTNETNETWORK pNetwork, PINTNETSG pSG,
3777 PRTNETETHERHDR pEthHdr, PINTNETDSTTAB pDstTab)
3778{
3779 /*
3780 * Extract the network address from the packet.
3781 */
3782 RTNETADDRU Addr;
3783 INTNETADDRTYPE enmAddrType;
3784 uint8_t cbAddr;
3785 switch (RT_BE2H_U16(pEthHdr->EtherType))
3786 {
3787 case RTNET_ETHERTYPE_IPV4:
3788 if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_UOFFSETOF(RTNETIPV4, ip_dst), sizeof(Addr.IPv4), &Addr)))
3789 {
3790 Log(("intnetshareduni: failed to read ip_dst! cbTotal=%#x\n", pSG->cbTotal));
3791 return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3792 }
3793 enmAddrType = kIntNetAddrType_IPv4;
3794 cbAddr = sizeof(Addr.IPv4);
3795 Log6(("intnetshareduni: IPv4 %d.%d.%d.%d\n", Addr.au8[0], Addr.au8[1], Addr.au8[2], Addr.au8[3]));
3796 break;
3797
3798 case RTNET_ETHERTYPE_IPV6:
3799 if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_UOFFSETOF(RTNETIPV6, ip6_dst), sizeof(Addr.IPv6), &Addr)))
3800 {
3801 Log(("intnetshareduni: failed to read ip6_dst! cbTotal=%#x\n", pSG->cbTotal));
3802 return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3803 }
3804 enmAddrType = kIntNetAddrType_IPv6;
3805 cbAddr = sizeof(Addr.IPv6);
3806 break;
3807#if 0 /** @todo IntNet: implement IPX for wireless MAC sharing? */
3808 case RTNET_ETHERTYPE_IPX_1:
3809 case RTNET_ETHERTYPE_IPX_2:
3810 case RTNET_ETHERTYPE_IPX_3:
3811 if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPX, ipx_dstnet), sizeof(Addr.IPX), &Addr)))
3812 {
3813 Log(("intnetshareduni: failed to read ipx_dstnet! cbTotal=%#x\n", pSG->cbTotal));
3814 return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3815 }
3816 enmAddrType = kIntNetAddrType_IPX;
3817 cbAddr = sizeof(Addr.IPX);
3818 break;
3819#endif
3820
3821 /*
3822 * Treat ARP as broadcast (it shouldn't end up here normally,
3823 * so it goes last in the switch).
3824 */
3825 case RTNET_ETHERTYPE_ARP:
3826 Log6(("intnetshareduni: ARP\n"));
3827 /** @todo revisit this broadcasting of unicast ARP frames! */
3828 return intnetR0NetworkSharedMacFixAndSwitchBroadcast(pNetwork, INTNETTRUNKDIR_WIRE, NULL, pSG, pEthHdr, pDstTab);
3829
3830 /*
3831 * Unknown packets are sent to the trunk and any promiscuous interfaces.
3832 */
3833 default:
3834 {
3835 Log6(("intnetshareduni: unknown ethertype=%#x\n", RT_BE2H_U16(pEthHdr->EtherType)));
3836 return intnetR0NetworkSwitchTrunkAndPromisc(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3837 }
3838 }
3839
3840 /*
3841 * Do level-3 switching.
3842 */
3843 INTNETSWDECISION enmSwDecision = intnetR0NetworkSwitchLevel3(pNetwork, &pEthHdr->DstMac,
3844 enmAddrType, &Addr, cbAddr,
3845 INTNETTRUNKDIR_WIRE, pDstTab);
3846
3847#ifdef INTNET_WITH_DHCP_SNOOPING
3848 /*
3849 * Perform DHCP snooping. GSO is not applicable to DHCP traffic
3850 */
3851 if ( enmAddrType == kIntNetAddrType_IPv4
3852 && pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN
3853 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3854 intnetR0TrunkIfSnoopAddr(pNetwork, pSG, RT_BE2H_U16(pEthHdr->EtherType));
3855#endif /* INTNET_WITH_DHCP_SNOOPING */
3856
3857 return enmSwDecision;
3858}
3859
3860
3861/**
3862 * Release all the interfaces in the destination table when we realize that
3863 * we're in a context where we cannot get the job done.
3864 *
3865 * @param pNetwork The network.
3866 * @param pDstTab The destination table.
3867 */
3868static void intnetR0NetworkReleaseDstTab(PINTNETNETWORK pNetwork, PINTNETDSTTAB pDstTab)
3869{
3870 /* The trunk interface. */
3871 if (pDstTab->fTrunkDst)
3872 {
3873 PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
3874 if (pTrunk)
3875 intnetR0BusyDec(pNetwork, &pTrunk->cBusy);
3876 pDstTab->pTrunk = NULL;
3877 pDstTab->fTrunkDst = 0;
3878 }
3879
3880 /* Regular interfaces. */
3881 uint32_t iIf = pDstTab->cIfs;
3882 while (iIf-- > 0)
3883 {
3884 PINTNETIF pIf = pDstTab->aIfs[iIf].pIf;
3885 intnetR0BusyDecIf(pIf);
3886 pDstTab->aIfs[iIf].pIf = NULL;
3887 }
3888 pDstTab->cIfs = 0;
3889}
3890
3891
3892/**
3893 * Deliver the frame to the interfaces specified in the destination table.
3894 *
3895 * @param pNetwork The network.
3896 * @param pDstTab The destination table.
3897 * @param pSG The frame to send.
3898 * @param pIfSender The sender interface. NULL if it originated via
3899 * the trunk.
3900 */
3901static void intnetR0NetworkDeliver(PINTNETNETWORK pNetwork, PINTNETDSTTAB pDstTab, PINTNETSG pSG, PINTNETIF pIfSender)
3902{
3903 /*
3904 * Do the interfaces first before sending it to the wire and risk having to
3905 * modify it.
3906 */
3907 uint32_t iIf = pDstTab->cIfs;
3908 while (iIf-- > 0)
3909 {
3910 PINTNETIF pIf = pDstTab->aIfs[iIf].pIf;
3911 intnetR0IfSend(pIf, pIfSender, pSG,
3912 pDstTab->aIfs[iIf].fReplaceDstMac ? &pIf->MacAddr: NULL);
3913 intnetR0BusyDecIf(pIf);
3914 pDstTab->aIfs[iIf].pIf = NULL;
3915 }
3916 pDstTab->cIfs = 0;
3917
3918 /*
3919 * Send to the trunk.
3920 *
3921 * Note! The switching functions will include the trunk even when the frame
3922 * source is the trunk. This is because we need it to figure out
3923 * whether the other half of the trunk should see the frame or not
3924 * and let the caller know.
3925 *
3926 * So, we'll ignore trunk sends here if the frame origin is
3927 * INTNETTRUNKSWPORT::pfnRecv.
3928 */
3929 if (pDstTab->fTrunkDst)
3930 {
3931 PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
3932 if (pTrunk)
3933 {
3934 if (pIfSender)
3935 intnetR0TrunkIfSend(pTrunk, pNetwork, pIfSender, pDstTab->fTrunkDst, pSG);
3936 intnetR0BusyDec(pNetwork, &pTrunk->cBusy);
3937 }
3938 pDstTab->pTrunk = NULL;
3939 pDstTab->fTrunkDst = 0;
3940 }
3941}
3942
3943
3944/**
3945 * Sends a frame.
3946 *
3947 * This function will distribute the frame to the interfaces it is addressed to.
3948 * It will also update the MAC address of the sender.
3949 *
3950 * The caller must own the network mutex.
3951 *
3952 * @returns The switching decision.
3953 * @param pNetwork The network the frame is being sent to.
3954 * @param pIfSender The interface sending the frame. This is NULL if it's the trunk.
3955 * @param fSrc The source flags. This 0 if it's not from the trunk.
3956 * @param pSG Pointer to the gather list.
3957 * @param pDstTab The destination table to use.
3958 */
3959static INTNETSWDECISION intnetR0NetworkSend(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, uint32_t fSrc,
3960 PINTNETSG pSG, PINTNETDSTTAB pDstTab)
3961{
3962 /*
3963 * Assert reality.
3964 */
3965 AssertPtr(pNetwork);
3966 AssertPtrNull(pIfSender);
3967 Assert(pIfSender ? fSrc == 0 : fSrc != 0);
3968 Assert(!pIfSender || pNetwork == pIfSender->pNetwork);
3969 AssertPtr(pSG);
3970 Assert(pSG->cSegsUsed >= 1);
3971 Assert(pSG->cSegsUsed <= pSG->cSegsAlloc);
3972 if (pSG->cbTotal < sizeof(RTNETETHERHDR))
3973 return INTNETSWDECISION_INVALID;
3974
3975 /*
3976 * Get the ethernet header (might theoretically involve multiple segments).
3977 */
3978 RTNETETHERHDR EthHdr;
3979 if (pSG->aSegs[0].cb >= sizeof(EthHdr))
3980 EthHdr = *(PCRTNETETHERHDR)pSG->aSegs[0].pv;
3981 else if (!intnetR0SgReadPart(pSG, 0, sizeof(EthHdr), &EthHdr))
3982 return INTNETSWDECISION_INVALID;
3983 if ( (EthHdr.DstMac.au8[0] == 0x08 && EthHdr.DstMac.au8[1] == 0x00 && EthHdr.DstMac.au8[2] == 0x27)
3984 || (EthHdr.SrcMac.au8[0] == 0x08 && EthHdr.SrcMac.au8[1] == 0x00 && EthHdr.SrcMac.au8[2] == 0x27)
3985 || (EthHdr.DstMac.au8[0] == 0x00 && EthHdr.DstMac.au8[1] == 0x16 && EthHdr.DstMac.au8[2] == 0xcb)
3986 || (EthHdr.SrcMac.au8[0] == 0x00 && EthHdr.SrcMac.au8[1] == 0x16 && EthHdr.SrcMac.au8[2] == 0xcb)
3987 || EthHdr.DstMac.au8[0] == 0xff
3988 || EthHdr.SrcMac.au8[0] == 0xff)
3989 Log2(("D=%.6Rhxs S=%.6Rhxs T=%04x f=%x z=%x\n",
3990 &EthHdr.DstMac, &EthHdr.SrcMac, RT_BE2H_U16(EthHdr.EtherType), fSrc, pSG->cbTotal));
3991
3992 /*
3993 * Learn the MAC address of the sender. No re-learning as the interface
3994 * user will normally tell us the right MAC address.
3995 *
3996 * Note! We don't notify the trunk about these mainly because of the
3997 * problematic contexts we might be called in.
3998 */
3999 if (RT_UNLIKELY( pIfSender
4000 && !pIfSender->fMacSet
4001 && memcmp(&EthHdr.SrcMac, &pIfSender->MacAddr, sizeof(pIfSender->MacAddr))
4002 && !intnetR0IsMacAddrMulticast(&EthHdr.SrcMac)
4003 ))
4004 {
4005 Log2(("IF MAC: %.6Rhxs -> %.6Rhxs\n", &pIfSender->MacAddr, &EthHdr.SrcMac));
4006 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4007
4008 PINTNETMACTABENTRY pIfEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIfSender);
4009 if (pIfEntry)
4010 pIfEntry->MacAddr = EthHdr.SrcMac;
4011 pIfSender->MacAddr = EthHdr.SrcMac;
4012
4013 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4014 }
4015
4016 /*
4017 * Deal with MAC address sharing as that may required editing of the
4018 * packets before we dispatch them anywhere.
4019 */
4020 INTNETSWDECISION enmSwDecision;
4021 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
4022 {
4023 if (intnetR0IsMacAddrMulticast(&EthHdr.DstMac))
4024 enmSwDecision = intnetR0NetworkSharedMacFixAndSwitchBroadcast(pNetwork, fSrc, pIfSender, pSG, &EthHdr, pDstTab);
4025 else if (fSrc & INTNETTRUNKDIR_WIRE)
4026 {
4027 if (intnetR0NetworkSharedMacDetectAndFixBroadcast(pNetwork, pSG, &EthHdr))
4028 enmSwDecision = intnetR0NetworkSharedMacFixAndSwitchBroadcast(pNetwork, fSrc, pIfSender, pSG, &EthHdr, pDstTab);
4029 else
4030 enmSwDecision = intnetR0NetworkSharedMacFixAndSwitchUnicast(pNetwork, pSG, &EthHdr, pDstTab);
4031 }
4032 else
4033 enmSwDecision = intnetR0NetworkSwitchUnicast(pNetwork, fSrc, pIfSender, &EthHdr.DstMac, pDstTab);
4034 }
4035 else if (intnetR0IsMacAddrMulticast(&EthHdr.DstMac))
4036 enmSwDecision = intnetR0NetworkSwitchBroadcast(pNetwork, fSrc, pIfSender, pDstTab);
4037 else
4038 enmSwDecision = intnetR0NetworkSwitchUnicast(pNetwork, fSrc, pIfSender, &EthHdr.DstMac, pDstTab);
4039
4040 /*
4041 * Deliver to the destinations if we can.
4042 */
4043 if (enmSwDecision != INTNETSWDECISION_BAD_CONTEXT)
4044 {
4045 if (intnetR0NetworkIsContextOk(pNetwork, pIfSender, pDstTab))
4046 intnetR0NetworkDeliver(pNetwork, pDstTab, pSG, pIfSender);
4047 else
4048 {
4049 intnetR0NetworkReleaseDstTab(pNetwork, pDstTab);
4050 enmSwDecision = INTNETSWDECISION_BAD_CONTEXT;
4051 }
4052 }
4053
4054 return enmSwDecision;
4055}
4056
4057
4058/**
4059 * Sends one or more frames.
4060 *
4061 * The function will first the frame which is passed as the optional arguments
4062 * pvFrame and cbFrame. These are optional since it also possible to chain
4063 * together one or more frames in the send buffer which the function will
4064 * process after considering it's arguments.
4065 *
4066 * The caller is responsible for making sure that there are no concurrent calls
4067 * to this method (with the same handle).
4068 *
4069 * @returns VBox status code.
4070 * @param hIf The interface handle.
4071 * @param pSession The caller's session.
4072 */
4073INTNETR0DECL(int) IntNetR0IfSend(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession)
4074{
4075 Log5(("IntNetR0IfSend: hIf=%RX32\n", hIf));
4076
4077 /*
4078 * Validate input and translate the handle.
4079 */
4080 PINTNET pIntNet = g_pIntNet;
4081 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4082 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4083
4084 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4085 if (!pIf)
4086 return VERR_INVALID_HANDLE;
4087 STAM_REL_PROFILE_START(&pIf->pIntBuf->StatSend1, a);
4088
4089 /*
4090 * Make sure we've got a network.
4091 */
4092 int rc = VINF_SUCCESS;
4093 intnetR0BusyIncIf(pIf);
4094 PINTNETNETWORK pNetwork = pIf->pNetwork;
4095 if (RT_LIKELY(pNetwork))
4096 {
4097 /*
4098 * Grab the destination table.
4099 */
4100 PINTNETDSTTAB pDstTab = ASMAtomicXchgPtrT(&pIf->pDstTab, NULL, PINTNETDSTTAB);
4101 if (RT_LIKELY(pDstTab))
4102 {
4103 /*
4104 * Process the send buffer.
4105 */
4106 INTNETSWDECISION enmSwDecision = INTNETSWDECISION_BROADCAST;
4107 INTNETSG Sg; /** @todo this will have to be changed if we're going to use async sending
4108 * with buffer sharing for some OS or service. Darwin copies everything so
4109 * I won't bother allocating and managing SGs right now. Sorry. */
4110 PINTNETHDR pHdr;
4111 while ((pHdr = IntNetRingGetNextFrameToRead(&pIf->pIntBuf->Send)) != NULL)
4112 {
4113 uint8_t const u8Type = pHdr->u8Type;
4114 if (u8Type == INTNETHDR_TYPE_FRAME)
4115 {
4116 /* Send regular frame. */
4117 void *pvCurFrame = IntNetHdrGetFramePtr(pHdr, pIf->pIntBuf);
4118 IntNetSgInitTemp(&Sg, pvCurFrame, pHdr->cbFrame);
4119 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
4120 intnetR0IfSnoopAddr(pIf, (uint8_t *)pvCurFrame, pHdr->cbFrame, false /*fGso*/, (uint16_t *)&Sg.fFlags);
4121 enmSwDecision = intnetR0NetworkSend(pNetwork, pIf, 0 /*fSrc*/, &Sg, pDstTab);
4122 }
4123 else if (u8Type == INTNETHDR_TYPE_GSO)
4124 {
4125 /* Send GSO frame if sane. */
4126 PPDMNETWORKGSO pGso = IntNetHdrGetGsoContext(pHdr, pIf->pIntBuf);
4127 uint32_t cbFrame = pHdr->cbFrame - sizeof(*pGso);
4128 if (RT_LIKELY(PDMNetGsoIsValid(pGso, pHdr->cbFrame, cbFrame)))
4129 {
4130 void *pvCurFrame = pGso + 1;
4131 IntNetSgInitTempGso(&Sg, pvCurFrame, cbFrame, pGso);
4132 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
4133 intnetR0IfSnoopAddr(pIf, (uint8_t *)pvCurFrame, cbFrame, true /*fGso*/, (uint16_t *)&Sg.fFlags);
4134 enmSwDecision = intnetR0NetworkSend(pNetwork, pIf, 0 /*fSrc*/, &Sg, pDstTab);
4135 }
4136 else
4137 {
4138 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatBadFrames); /* ignore */
4139 enmSwDecision = INTNETSWDECISION_DROP;
4140 }
4141 }
4142 /* Unless it's a padding frame, we're getting babble from the producer. */
4143 else
4144 {
4145 if (u8Type != INTNETHDR_TYPE_PADDING)
4146 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatBadFrames); /* ignore */
4147 enmSwDecision = INTNETSWDECISION_DROP;
4148 }
4149 if (enmSwDecision == INTNETSWDECISION_BAD_CONTEXT)
4150 {
4151 rc = VERR_TRY_AGAIN;
4152 break;
4153 }
4154
4155 /* Skip to the next frame. */
4156 IntNetRingSkipFrame(&pIf->pIntBuf->Send);
4157 }
4158
4159 /*
4160 * Put back the destination table.
4161 */
4162 Assert(!pIf->pDstTab);
4163 ASMAtomicWritePtr(&pIf->pDstTab, pDstTab);
4164 }
4165 else
4166 rc = VERR_INTERNAL_ERROR_4;
4167 }
4168 else
4169 rc = VERR_INTERNAL_ERROR_3;
4170
4171 /*
4172 * Release the interface.
4173 */
4174 intnetR0BusyDecIf(pIf);
4175 STAM_REL_PROFILE_STOP(&pIf->pIntBuf->StatSend1, a);
4176 intnetR0IfRelease(pIf, pSession);
4177 return rc;
4178}
4179
4180
4181/**
4182 * VMMR0 request wrapper for IntNetR0IfSend.
4183 *
4184 * @returns see IntNetR0IfSend.
4185 * @param pSession The caller's session.
4186 * @param pReq The request packet.
4187 */
4188INTNETR0DECL(int) IntNetR0IfSendReq(PSUPDRVSESSION pSession, PINTNETIFSENDREQ pReq)
4189{
4190 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4191 return VERR_INVALID_PARAMETER;
4192 return IntNetR0IfSend(pReq->hIf, pSession);
4193}
4194
4195
4196/**
4197 * Maps the default buffer into ring 3.
4198 *
4199 * @returns VBox status code.
4200 * @param hIf The interface handle.
4201 * @param pSession The caller's session.
4202 * @param ppRing3Buf Where to store the address of the ring-3 mapping
4203 * (optional).
4204 * @param ppRing0Buf Where to store the address of the ring-0 mapping
4205 * (optional).
4206 */
4207INTNETR0DECL(int) IntNetR0IfGetBufferPtrs(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession,
4208 R3PTRTYPE(PINTNETBUF) *ppRing3Buf, R0PTRTYPE(PINTNETBUF) *ppRing0Buf)
4209{
4210 LogFlow(("IntNetR0IfGetBufferPtrs: hIf=%RX32 ppRing3Buf=%p ppRing0Buf=%p\n", hIf, ppRing3Buf, ppRing0Buf));
4211
4212 /*
4213 * Validate input.
4214 */
4215 PINTNET pIntNet = g_pIntNet;
4216 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4217 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4218
4219 AssertPtrNullReturn(ppRing3Buf, VERR_INVALID_PARAMETER);
4220 AssertPtrNullReturn(ppRing0Buf, VERR_INVALID_PARAMETER);
4221 if (ppRing3Buf)
4222 *ppRing3Buf = 0;
4223 if (ppRing0Buf)
4224 *ppRing0Buf = 0;
4225
4226 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4227 if (!pIf)
4228 return VERR_INVALID_HANDLE;
4229
4230 /*
4231 * ASSUMES that only the process that created an interface can use it.
4232 * ASSUMES that we created the ring-3 mapping when selecting or
4233 * allocating the buffer.
4234 */
4235 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
4236 if (RT_SUCCESS(rc))
4237 {
4238 if (ppRing3Buf)
4239 *ppRing3Buf = pIf->pIntBufR3;
4240 if (ppRing0Buf)
4241 *ppRing0Buf = (R0PTRTYPE(PINTNETBUF))pIf->pIntBuf; /* tstIntNetR0 mess */
4242
4243 rc = RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
4244 }
4245
4246 intnetR0IfRelease(pIf, pSession);
4247 LogFlow(("IntNetR0IfGetBufferPtrs: returns %Rrc *ppRing3Buf=%p *ppRing0Buf=%p\n",
4248 rc, ppRing3Buf ? *ppRing3Buf : NIL_RTR3PTR, ppRing0Buf ? *ppRing0Buf : NIL_RTR0PTR));
4249 return rc;
4250}
4251
4252
4253/**
4254 * VMMR0 request wrapper for IntNetR0IfGetBufferPtrs.
4255 *
4256 * @returns see IntNetR0IfGetRing3Buffer.
4257 * @param pSession The caller's session.
4258 * @param pReq The request packet.
4259 */
4260INTNETR0DECL(int) IntNetR0IfGetBufferPtrsReq(PSUPDRVSESSION pSession, PINTNETIFGETBUFFERPTRSREQ pReq)
4261{
4262 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4263 return VERR_INVALID_PARAMETER;
4264 return IntNetR0IfGetBufferPtrs(pReq->hIf, pSession, &pReq->pRing3Buf, &pReq->pRing0Buf);
4265}
4266
4267
4268#if 0
4269/**
4270 * Gets the physical addresses of the default interface buffer.
4271 *
4272 * @returns VBox status code.
4273 * @param hIF The interface handle.
4274 * @param paPages Where to store the addresses. (The reserved fields will be set to zero.)
4275 * @param cPages
4276 */
4277INTNETR0DECL(int) IntNetR0IfGetPhysBuffer(INTNETIFHANDLE hIf, PSUPPAGE paPages, unsigned cPages)
4278{
4279 /*
4280 * Validate input.
4281 */
4282 PINTNET pIntNet = g_pIntNet;
4283 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4284 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4285
4286 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
4287 AssertPtrReturn((uint8_t *)&paPages[cPages] - 1, VERR_INVALID_PARAMETER);
4288 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4289 if (!pIf)
4290 return VERR_INVALID_HANDLE;
4291
4292 /*
4293 * Grab the lock and get the data.
4294 * ASSUMES that the handle isn't closed while we're here.
4295 */
4296 int rc = RTSemFastMutexRequest(pIf->pNetwork->FastMutex);
4297 if (RT_SUCCESS(rc))
4298 {
4299 /** @todo make a SUPR0 api for obtaining the array. SUPR0/IPRT is keeping track of everything, there
4300 * is no need for any extra bookkeeping here.. */
4301
4302 rc = RTSemFastMutexRelease(pIf->pNetwork->FastMutex);
4303 }
4304 intnetR0IfRelease(pIf, pSession);
4305 return VERR_NOT_IMPLEMENTED;
4306}
4307#endif
4308
4309
4310/**
4311 * Sets the promiscuous mode property of an interface.
4312 *
4313 * @returns VBox status code.
4314 * @param hIf The interface handle.
4315 * @param pSession The caller's session.
4316 * @param fPromiscuous Set if the interface should be in promiscuous mode, clear if not.
4317 */
4318INTNETR0DECL(int) IntNetR0IfSetPromiscuousMode(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fPromiscuous)
4319{
4320 LogFlow(("IntNetR0IfSetPromiscuousMode: hIf=%RX32 fPromiscuous=%d\n", hIf, fPromiscuous));
4321
4322 /*
4323 * Validate & translate input.
4324 */
4325 PINTNET pIntNet = g_pIntNet;
4326 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4327 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4328
4329 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4330 if (!pIf)
4331 {
4332 Log(("IntNetR0IfSetPromiscuousMode: returns VERR_INVALID_HANDLE\n"));
4333 return VERR_INVALID_HANDLE;
4334 }
4335
4336 /*
4337 * Get the network, take the address spinlock, and make the change.
4338 * Paranoia^2: Mark ourselves busy to prevent anything from being destroyed.
4339 */
4340 int rc = VINF_SUCCESS;
4341 intnetR0BusyIncIf(pIf);
4342 PINTNETNETWORK pNetwork = pIf->pNetwork;
4343 if (pNetwork)
4344 {
4345 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4346
4347 if (pIf->fPromiscuousReal != fPromiscuous)
4348 {
4349 const bool fPromiscuousEff = fPromiscuous
4350 && (pIf->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW)
4351 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS);
4352 Log(("IntNetR0IfSetPromiscuousMode: hIf=%RX32: Changed from %d -> %d (%d)\n",
4353 hIf, !fPromiscuous, !!fPromiscuous, fPromiscuousEff));
4354
4355 pIf->fPromiscuousReal = fPromiscuous;
4356
4357 PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
4358 if (RT_LIKELY(pEntry))
4359 {
4360 if (pEntry->fPromiscuousEff)
4361 {
4362 pNetwork->MacTab.cPromiscuousEntries--;
4363 if (!pEntry->fPromiscuousSeeTrunk)
4364 pNetwork->MacTab.cPromiscuousNoTrunkEntries--;
4365 Assert(pNetwork->MacTab.cPromiscuousEntries < pNetwork->MacTab.cEntries);
4366 Assert(pNetwork->MacTab.cPromiscuousNoTrunkEntries < pNetwork->MacTab.cEntries);
4367 }
4368
4369 pEntry->fPromiscuousEff = fPromiscuousEff;
4370 pEntry->fPromiscuousSeeTrunk = fPromiscuousEff
4371 && (pIf->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK);
4372
4373 if (pEntry->fPromiscuousEff)
4374 {
4375 pNetwork->MacTab.cPromiscuousEntries++;
4376 if (!pEntry->fPromiscuousSeeTrunk)
4377 pNetwork->MacTab.cPromiscuousNoTrunkEntries++;
4378 }
4379 Assert(pNetwork->MacTab.cPromiscuousEntries <= pNetwork->MacTab.cEntries);
4380 Assert(pNetwork->MacTab.cPromiscuousNoTrunkEntries <= pNetwork->MacTab.cEntries);
4381 }
4382 }
4383
4384 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4385 }
4386 else
4387 rc = VERR_WRONG_ORDER;
4388
4389 intnetR0BusyDecIf(pIf);
4390 intnetR0IfRelease(pIf, pSession);
4391 return rc;
4392}
4393
4394
4395/**
4396 * VMMR0 request wrapper for IntNetR0IfSetPromiscuousMode.
4397 *
4398 * @returns see IntNetR0IfSetPromiscuousMode.
4399 * @param pSession The caller's session.
4400 * @param pReq The request packet.
4401 */
4402INTNETR0DECL(int) IntNetR0IfSetPromiscuousModeReq(PSUPDRVSESSION pSession, PINTNETIFSETPROMISCUOUSMODEREQ pReq)
4403{
4404 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4405 return VERR_INVALID_PARAMETER;
4406 return IntNetR0IfSetPromiscuousMode(pReq->hIf, pSession, pReq->fPromiscuous);
4407}
4408
4409
4410/**
4411 * Sets the MAC address of an interface.
4412 *
4413 * @returns VBox status code.
4414 * @param hIf The interface handle.
4415 * @param pSession The caller's session.
4416 * @param pMAC The new MAC address.
4417 */
4418INTNETR0DECL(int) IntNetR0IfSetMacAddress(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, PCRTMAC pMac)
4419{
4420 LogFlow(("IntNetR0IfSetMacAddress: hIf=%RX32 pMac=%p:{%.6Rhxs}\n", hIf, pMac, pMac));
4421
4422 /*
4423 * Validate & translate input.
4424 */
4425 PINTNET pIntNet = g_pIntNet;
4426 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4427 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4428
4429 AssertPtrReturn(pMac, VERR_INVALID_PARAMETER);
4430 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4431 if (!pIf)
4432 {
4433 Log(("IntNetR0IfSetMacAddress: returns VERR_INVALID_HANDLE\n"));
4434 return VERR_INVALID_HANDLE;
4435 }
4436
4437 /*
4438 * Get the network, take the address spinlock, and make the change.
4439 * Paranoia^2: Mark ourselves busy to prevent anything from being destroyed.
4440 */
4441 int rc = VINF_SUCCESS;
4442 intnetR0BusyIncIf(pIf);
4443 PINTNETNETWORK pNetwork = pIf->pNetwork;
4444 if (pNetwork)
4445 {
4446 PINTNETTRUNKIF pTrunk = NULL;
4447
4448 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4449
4450 if (memcmp(&pIf->MacAddr, pMac, sizeof(pIf->MacAddr)))
4451 {
4452 Log(("IntNetR0IfSetMacAddress: hIf=%RX32: Changed from %.6Rhxs -> %.6Rhxs\n",
4453 hIf, &pIf->MacAddr, pMac));
4454
4455 /* Update the two copies. */
4456 PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
4457 if (RT_LIKELY(pEntry))
4458 pEntry->MacAddr = *pMac;
4459 pIf->MacAddr = *pMac;
4460 pIf->fMacSet = true;
4461
4462 /* Grab a busy reference to the trunk so we release the lock before notifying it. */
4463 pTrunk = pNetwork->MacTab.pTrunk;
4464 if (pTrunk)
4465 intnetR0BusyIncTrunk(pTrunk);
4466 }
4467
4468 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4469
4470 if (pTrunk)
4471 {
4472 Log(("IntNetR0IfSetMacAddress: pfnNotifyMacAddress hIf=%RX32\n", hIf));
4473 PINTNETTRUNKIFPORT pIfPort = pTrunk->pIfPort;
4474 if (pIfPort)
4475 pIfPort->pfnNotifyMacAddress(pIfPort, pIf->pvIfData, pMac);
4476 intnetR0BusyDecTrunk(pTrunk);
4477 }
4478 }
4479 else
4480 rc = VERR_WRONG_ORDER;
4481
4482 intnetR0BusyDecIf(pIf);
4483 intnetR0IfRelease(pIf, pSession);
4484 return rc;
4485}
4486
4487
4488/**
4489 * VMMR0 request wrapper for IntNetR0IfSetMacAddress.
4490 *
4491 * @returns see IntNetR0IfSetMacAddress.
4492 * @param pSession The caller's session.
4493 * @param pReq The request packet.
4494 */
4495INTNETR0DECL(int) IntNetR0IfSetMacAddressReq(PSUPDRVSESSION pSession, PINTNETIFSETMACADDRESSREQ pReq)
4496{
4497 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4498 return VERR_INVALID_PARAMETER;
4499 return IntNetR0IfSetMacAddress(pReq->hIf, pSession, &pReq->Mac);
4500}
4501
4502
4503/**
4504 * Worker for intnetR0IfSetActive and intnetR0IfDestruct.
4505 *
4506 * This function will update the active interface count on the network and
4507 * activate or deactivate the trunk connection if necessary.
4508 *
4509 * The call must own the giant lock (we cannot take it here).
4510 *
4511 * @returns VBox status code.
4512 * @param pNetwork The network.
4513 * @param fIf The interface.
4514 * @param fActive What to do.
4515 */
4516static int intnetR0NetworkSetIfActive(PINTNETNETWORK pNetwork, PINTNETIF pIf, bool fActive)
4517{
4518 /* quick sanity check */
4519 AssertPtr(pNetwork);
4520 AssertPtr(pIf);
4521
4522 /*
4523 * The address spinlock of the network protects the variables, while the
4524 * big lock protects the calling of pfnSetState. Grab both lock at once
4525 * to save us the extra hassle.
4526 */
4527 PINTNETTRUNKIF pTrunk = NULL;
4528 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4529
4530 /*
4531 * Do the update.
4532 */
4533 if (pIf->fActive != fActive)
4534 {
4535 PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
4536 if (RT_LIKELY(pEntry))
4537 {
4538 pEntry->fActive = fActive;
4539 pIf->fActive = fActive;
4540
4541 if (fActive)
4542 {
4543 pNetwork->cActiveIFs++;
4544 if (pNetwork->cActiveIFs == 1)
4545 {
4546 pTrunk = pNetwork->MacTab.pTrunk;
4547 if (pTrunk)
4548 {
4549 pNetwork->MacTab.fHostActive = RT_BOOL(pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED);
4550 pNetwork->MacTab.fWireActive = RT_BOOL(pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED);
4551 }
4552 }
4553 }
4554 else
4555 {
4556 pNetwork->cActiveIFs--;
4557 if (pNetwork->cActiveIFs == 0)
4558 {
4559 pTrunk = pNetwork->MacTab.pTrunk;
4560 pNetwork->MacTab.fHostActive = false;
4561 pNetwork->MacTab.fWireActive = false;
4562 }
4563 }
4564 }
4565 }
4566
4567 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4568
4569 /*
4570 * Tell the trunk if necessary.
4571 * The wait for !busy is for the Solaris streams trunk driver (mostly).
4572 */
4573 if (pTrunk && pTrunk->pIfPort)
4574 {
4575 if (!fActive)
4576 intnetR0BusyWait(pNetwork, &pTrunk->cBusy);
4577
4578 pTrunk->pIfPort->pfnSetState(pTrunk->pIfPort, fActive ? INTNETTRUNKIFSTATE_ACTIVE : INTNETTRUNKIFSTATE_INACTIVE);
4579 }
4580
4581 return VINF_SUCCESS;
4582}
4583
4584
4585/**
4586 * Sets the active property of an interface.
4587 *
4588 * @returns VBox status code.
4589 * @param hIf The interface handle.
4590 * @param pSession The caller's session.
4591 * @param fActive The new state.
4592 */
4593INTNETR0DECL(int) IntNetR0IfSetActive(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fActive)
4594{
4595 LogFlow(("IntNetR0IfSetActive: hIf=%RX32 fActive=%RTbool\n", hIf, fActive));
4596
4597 /*
4598 * Validate & translate input.
4599 */
4600 PINTNET pIntNet = g_pIntNet;
4601 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4602 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4603
4604 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4605 if (!pIf)
4606 {
4607 Log(("IntNetR0IfSetActive: returns VERR_INVALID_HANDLE\n"));
4608 return VERR_INVALID_HANDLE;
4609 }
4610
4611 /*
4612 * Hand it to the network since it might involve the trunk and things are
4613 * tricky there wrt to locking order.
4614 *
4615 * 1. We take the giant lock here. This makes sure nobody is re-enabling
4616 * the network while we're pausing it and vice versa. This also enables
4617 * us to wait for the network to become idle before telling the trunk.
4618 * (Important on Solaris.)
4619 *
4620 * 2. For paranoid reasons, we grab a busy reference to the calling
4621 * interface. This is totally unnecessary but should hurt (when done
4622 * after grabbing the giant lock).
4623 */
4624 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
4625 if (RT_SUCCESS(rc))
4626 {
4627 intnetR0BusyIncIf(pIf);
4628
4629 PINTNETNETWORK pNetwork = pIf->pNetwork;
4630 if (pNetwork)
4631 rc = intnetR0NetworkSetIfActive(pNetwork, pIf, fActive);
4632 else
4633 rc = VERR_WRONG_ORDER;
4634
4635 intnetR0BusyDecIf(pIf);
4636 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
4637 }
4638
4639 intnetR0IfRelease(pIf, pSession);
4640 LogFlow(("IntNetR0IfSetActive: returns %Rrc\n", rc));
4641 return rc;
4642}
4643
4644
4645/**
4646 * VMMR0 request wrapper for IntNetR0IfSetActive.
4647 *
4648 * @returns see IntNetR0IfSetActive.
4649 * @param pIntNet The internal networking instance.
4650 * @param pSession The caller's session.
4651 * @param pReq The request packet.
4652 */
4653INTNETR0DECL(int) IntNetR0IfSetActiveReq(PSUPDRVSESSION pSession, PINTNETIFSETACTIVEREQ pReq)
4654{
4655 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4656 return VERR_INVALID_PARAMETER;
4657 return IntNetR0IfSetActive(pReq->hIf, pSession, pReq->fActive);
4658}
4659
4660
4661/**
4662 * Wait for the interface to get signaled.
4663 * The interface will be signaled when is put into the receive buffer.
4664 *
4665 * @returns VBox status code.
4666 * @param hIf The interface handle.
4667 * @param pSession The caller's session.
4668 * @param cMillies Number of milliseconds to wait. RT_INDEFINITE_WAIT should be
4669 * used if indefinite wait is desired.
4670 */
4671INTNETR0DECL(int) IntNetR0IfWait(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, uint32_t cMillies)
4672{
4673 Log4(("IntNetR0IfWait: hIf=%RX32 cMillies=%u\n", hIf, cMillies));
4674
4675 /*
4676 * Get and validate essential handles.
4677 */
4678 PINTNET pIntNet = g_pIntNet;
4679 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4680 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4681
4682 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4683 if (!pIf)
4684 {
4685 Log(("IntNetR0IfWait: returns VERR_INVALID_HANDLE\n"));
4686 return VERR_INVALID_HANDLE;
4687 }
4688
4689#if defined(VBOX_WITH_INTNET_SERVICE_IN_R3) && defined(IN_RING3)
4690 AssertReleaseFailed(); /* Should never be called. */
4691 RT_NOREF(cMillies);
4692 return VERR_NOT_SUPPORTED;
4693#else
4694 const RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
4695 const bool fNoMoreWaits = ASMAtomicUoReadBool(&pIf->fNoMoreWaits);
4696 RTNATIVETHREAD hDtorThrd;
4697 ASMAtomicReadHandle(&pIf->hDestructorThread, &hDtorThrd);
4698 if (hDtorThrd != NIL_RTNATIVETHREAD)
4699 {
4700 /* See IntNetR0IfAbortWait for an explanation of hDestructorThread. */
4701 Log(("IntNetR0IfWait: returns VERR_SEM_DESTROYED\n"));
4702 return VERR_SEM_DESTROYED;
4703 }
4704
4705 /* Check whether further waits have been barred by IntNetR0IfAbortWait. */
4706 int rc;
4707 if ( !fNoMoreWaits
4708 && hRecvEvent != NIL_RTSEMEVENT)
4709 {
4710 /*
4711 * It is tempting to check if there is data to be read here,
4712 * but the problem with such an approach is that it will cause
4713 * one unnecessary supervisor->user->supervisor trip. There is
4714 * already a slight risk for such, so no need to increase it.
4715 */
4716
4717 /*
4718 * Increment the number of waiters before starting the wait.
4719 * Upon wakeup we must assert reality, checking that we're not
4720 * already destroyed or in the process of being destroyed. This
4721 * code must be aligned with the waiting code in intnetR0IfDestruct.
4722 */
4723 ASMAtomicIncU32(&pIf->cSleepers);
4724 rc = RTSemEventWaitNoResume(hRecvEvent, cMillies);
4725 if (pIf->hRecvEvent == hRecvEvent)
4726 {
4727 ASMAtomicDecU32(&pIf->cSleepers);
4728 ASMAtomicReadHandle(&pIf->hDestructorThread, &hDtorThrd);
4729 if (hDtorThrd == NIL_RTNATIVETHREAD)
4730 {
4731 if (intnetR0IfRelease(pIf, pSession))
4732 rc = VERR_SEM_DESTROYED;
4733 }
4734 else
4735 rc = VERR_SEM_DESTROYED;
4736 }
4737 else
4738 rc = VERR_SEM_DESTROYED;
4739 }
4740 else
4741 {
4742 rc = VERR_SEM_DESTROYED;
4743 intnetR0IfRelease(pIf, pSession);
4744 }
4745
4746 Log4(("IntNetR0IfWait: returns %Rrc\n", rc));
4747 return rc;
4748#endif
4749}
4750
4751
4752/**
4753 * VMMR0 request wrapper for IntNetR0IfWait.
4754 *
4755 * @returns see IntNetR0IfWait.
4756 * @param pSession The caller's session.
4757 * @param pReq The request packet.
4758 */
4759INTNETR0DECL(int) IntNetR0IfWaitReq(PSUPDRVSESSION pSession, PINTNETIFWAITREQ pReq)
4760{
4761 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4762 return VERR_INVALID_PARAMETER;
4763 return IntNetR0IfWait(pReq->hIf, pSession, pReq->cMillies);
4764}
4765
4766
4767/**
4768 * Wake up any threads waiting on the interface.
4769 *
4770 * @returns VBox status code.
4771 * @param hIf The interface handle.
4772 * @param pSession The caller's session.
4773 * @param fNoMoreWaits When set, no more waits are permitted.
4774 */
4775INTNETR0DECL(int) IntNetR0IfAbortWait(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fNoMoreWaits)
4776{
4777 Log4(("IntNetR0IfAbortWait: hIf=%RX32 fNoMoreWaits=%RTbool\n", hIf, fNoMoreWaits));
4778
4779 /*
4780 * Get and validate essential handles.
4781 */
4782 PINTNET pIntNet = g_pIntNet;
4783 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4784 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4785
4786 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4787 if (!pIf)
4788 {
4789 Log(("IntNetR0IfAbortWait: returns VERR_INVALID_HANDLE\n"));
4790 return VERR_INVALID_HANDLE;
4791 }
4792
4793#if defined(VBOX_WITH_INTNET_SERVICE_IN_R3) && defined(IN_RING3)
4794 AssertReleaseFailed();
4795 RT_NOREF(fNoMoreWaits);
4796 return VERR_NOT_SUPPORTED;
4797#else
4798 const RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
4799 RTNATIVETHREAD hDtorThrd;
4800 ASMAtomicReadHandle(&pIf->hDestructorThread, &hDtorThrd);
4801 if (hDtorThrd != NIL_RTNATIVETHREAD)
4802 {
4803 /* This can only happen if we for some reason race SUPDRVSESSION cleanup,
4804 i.e. the object count is set to zero without yet having removed it from
4805 the object table, so we got a spurious "reference". We must drop that
4806 reference and let the destructor get on with its work. (Not entirely sure
4807 if this is practically possible on any of the platforms, i.e. whether it's
4808 we can actually close a SUPDrv handle/descriptor with active threads still
4809 in NtDeviceIoControlFile/ioctl, but better safe than sorry.) */
4810 Log(("IntNetR0IfAbortWait: returns VERR_SEM_DESTROYED\n"));
4811 return VERR_SEM_DESTROYED;
4812 }
4813
4814 /* a bit of paranoia */
4815 int rc = VINF_SUCCESS;
4816 if (hRecvEvent != NIL_RTSEMEVENT)
4817 {
4818 /*
4819 * Set fNoMoreWaits if requested to do so and then wake up all the sleeping
4820 * threads (usually just one). We leave the semaphore in the signalled
4821 * state so the next caller will return immediately.
4822 */
4823 if (fNoMoreWaits)
4824 ASMAtomicWriteBool(&pIf->fNoMoreWaits, true);
4825
4826 uint32_t cSleepers = ASMAtomicReadU32(&pIf->cSleepers) + 1;
4827 while (cSleepers-- > 0)
4828 {
4829 int rc2 = RTSemEventSignal(pIf->hRecvEvent);
4830 AssertRC(rc2);
4831 }
4832 }
4833 else
4834 rc = VERR_SEM_DESTROYED;
4835
4836 intnetR0IfRelease(pIf, pSession);
4837
4838 Log4(("IntNetR0IfWait: returns %Rrc\n", VINF_SUCCESS));
4839 return VINF_SUCCESS;
4840#endif
4841}
4842
4843
4844/**
4845 * VMMR0 request wrapper for IntNetR0IfAbortWait.
4846 *
4847 * @returns see IntNetR0IfWait.
4848 * @param pSession The caller's session.
4849 * @param pReq The request packet.
4850 */
4851INTNETR0DECL(int) IntNetR0IfAbortWaitReq(PSUPDRVSESSION pSession, PINTNETIFABORTWAITREQ pReq)
4852{
4853 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4854 return VERR_INVALID_PARAMETER;
4855 return IntNetR0IfAbortWait(pReq->hIf, pSession, pReq->fNoMoreWaits);
4856}
4857
4858
4859/**
4860 * Close an interface.
4861 *
4862 * @returns VBox status code.
4863 * @param pIntNet The instance handle.
4864 * @param hIf The interface handle.
4865 * @param pSession The caller's session.
4866 */
4867INTNETR0DECL(int) IntNetR0IfClose(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession)
4868{
4869 LogFlow(("IntNetR0IfClose: hIf=%RX32\n", hIf));
4870
4871 /*
4872 * Validate and free the handle.
4873 */
4874 PINTNET pIntNet = g_pIntNet;
4875 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4876 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4877
4878 PINTNETIF pIf = (PINTNETIF)RTHandleTableFreeWithCtx(pIntNet->hHtIfs, hIf, pSession);
4879 if (!pIf)
4880 return VERR_INVALID_HANDLE;
4881
4882 /* Mark the handle as freed so intnetR0IfDestruct won't free it again. */
4883 ASMAtomicWriteU32(&pIf->hIf, INTNET_HANDLE_INVALID);
4884
4885#if !defined(VBOX_WITH_INTNET_SERVICE_IN_R3) || !defined(IN_RING3)
4886 /*
4887 * Signal the event semaphore to wake up any threads in IntNetR0IfWait
4888 * and give them a moment to get out and release the interface.
4889 */
4890 uint32_t i = pIf->cSleepers;
4891 while (i-- > 0)
4892 {
4893 RTSemEventSignal(pIf->hRecvEvent);
4894 RTThreadYield();
4895 }
4896 RTSemEventSignal(pIf->hRecvEvent);
4897#endif
4898
4899 /*
4900 * Release the references to the interface object (handle + free lookup).
4901 */
4902 void *pvObj = pIf->pvObj;
4903 intnetR0IfRelease(pIf, pSession); /* (RTHandleTableFreeWithCtx) */
4904
4905 int rc = SUPR0ObjRelease(pvObj, pSession);
4906 LogFlow(("IntNetR0IfClose: returns %Rrc\n", rc));
4907 return rc;
4908}
4909
4910
4911/**
4912 * VMMR0 request wrapper for IntNetR0IfCloseReq.
4913 *
4914 * @returns see IntNetR0IfClose.
4915 * @param pSession The caller's session.
4916 * @param pReq The request packet.
4917 */
4918INTNETR0DECL(int) IntNetR0IfCloseReq(PSUPDRVSESSION pSession, PINTNETIFCLOSEREQ pReq)
4919{
4920 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4921 return VERR_INVALID_PARAMETER;
4922 return IntNetR0IfClose(pReq->hIf, pSession);
4923}
4924
4925
4926/**
4927 * Interface destructor callback.
4928 * This is called for reference counted objectes when the count reaches 0.
4929 *
4930 * @param pvObj The object pointer.
4931 * @param pvUser1 Pointer to the interface.
4932 * @param pvUser2 Pointer to the INTNET instance data.
4933 */
4934static DECLCALLBACK(void) intnetR0IfDestruct(void *pvObj, void *pvUser1, void *pvUser2)
4935{
4936 PINTNETIF pIf = (PINTNETIF)pvUser1;
4937 PINTNET pIntNet = (PINTNET)pvUser2;
4938 Log(("intnetR0IfDestruct: pvObj=%p pIf=%p pIntNet=%p hIf=%RX32\n", pvObj, pIf, pIntNet, pIf->hIf));
4939 RT_NOREF1(pvObj);
4940
4941 /*
4942 * For paranoid reasons we must now mark the interface as destroyed.
4943 * This is so that any waiting threads can take evasive action (kind
4944 * of theoretical case), and we can reject everyone else referencing
4945 * the object via the handle table before we get around to removing it.
4946 */
4947 ASMAtomicWriteHandle(&pIf->hDestructorThread, RTThreadNativeSelf());
4948
4949 /*
4950 * We grab the INTNET create/open/destroy semaphore to make sure nobody is
4951 * adding or removing interfaces while we're in here.
4952 */
4953 RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
4954
4955 /*
4956 * Delete the interface handle so the object no longer can be used.
4957 * (Can happen if the client didn't close its session.)
4958 */
4959 INTNETIFHANDLE hIf = ASMAtomicXchgU32(&pIf->hIf, INTNET_HANDLE_INVALID);
4960 if (hIf != INTNET_HANDLE_INVALID)
4961 {
4962 void *pvObj2 = RTHandleTableFreeWithCtx(pIntNet->hHtIfs, hIf, pIf->pSession); NOREF(pvObj2);
4963 AssertMsg(pvObj2 == pIf, ("%p, %p, hIf=%RX32 pSession=%p\n", pvObj2, pIf, hIf, pIf->pSession));
4964 }
4965
4966 /*
4967 * If we've got a network deactivate and detach ourselves from it. Because
4968 * of cleanup order we might have been orphaned by the network destructor.
4969 */
4970 PINTNETNETWORK pNetwork = pIf->pNetwork;
4971 if (pNetwork)
4972 {
4973 /* set inactive. */
4974 intnetR0NetworkSetIfActive(pNetwork, pIf, false /*fActive*/);
4975
4976 /* remove ourselves from the switch table. */
4977 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4978
4979 uint32_t iIf = pNetwork->MacTab.cEntries;
4980 while (iIf-- > 0)
4981 if (pNetwork->MacTab.paEntries[iIf].pIf == pIf)
4982 {
4983 if (pNetwork->MacTab.paEntries[iIf].fPromiscuousEff)
4984 {
4985 pNetwork->MacTab.cPromiscuousEntries--;
4986 if (!pNetwork->MacTab.paEntries[iIf].fPromiscuousSeeTrunk)
4987 pNetwork->MacTab.cPromiscuousNoTrunkEntries--;
4988 }
4989 Assert(pNetwork->MacTab.cPromiscuousEntries < pNetwork->MacTab.cEntries);
4990 Assert(pNetwork->MacTab.cPromiscuousNoTrunkEntries < pNetwork->MacTab.cEntries);
4991
4992 if (iIf + 1 < pNetwork->MacTab.cEntries)
4993 memmove(&pNetwork->MacTab.paEntries[iIf],
4994 &pNetwork->MacTab.paEntries[iIf + 1],
4995 (pNetwork->MacTab.cEntries - iIf - 1) * sizeof(pNetwork->MacTab.paEntries[0]));
4996 pNetwork->MacTab.cEntries--;
4997 break;
4998 }
4999
5000 /* recalc the min flags. */
5001 if (pIf->fOpenFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES)
5002 {
5003 uint32_t fMinFlags = 0;
5004 iIf = pNetwork->MacTab.cEntries;
5005 while (iIf-- > 0)
5006 {
5007 PINTNETIF pIf2 = pNetwork->MacTab.paEntries[iIf].pIf;
5008 if ( pIf2 /* paranoia */
5009 && (pIf2->fOpenFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES))
5010 fMinFlags |= pIf2->fOpenFlags & INTNET_OPEN_FLAGS_STRICT_MASK;
5011 }
5012 pNetwork->fMinFlags = fMinFlags;
5013 }
5014
5015 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
5016
5017 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5018
5019 /* Notify the trunk about the interface being destroyed. */
5020 if (pTrunk && pTrunk->pIfPort)
5021 pTrunk->pIfPort->pfnDisconnectInterface(pTrunk->pIfPort, pIf->pvIfData);
5022
5023 /* Wait for the interface to quiesce while we still can. */
5024 intnetR0BusyWait(pNetwork, &pIf->cBusy);
5025
5026 /* Release our reference to the network. */
5027 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5028 pIf->pNetwork = NULL;
5029 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5030
5031 SUPR0ObjRelease(pNetwork->pvObj, pIf->pSession);
5032 }
5033
5034 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
5035
5036#if !defined(VBOX_WITH_INTNET_SERVICE_IN_R3) || !defined(IN_RING3)
5037 /*
5038 * Wakeup anyone waiting on this interface. (Kind of unlikely, but perhaps
5039 * not quite impossible.)
5040 *
5041 * We *must* make sure they have woken up properly and realized
5042 * that the interface is no longer valid.
5043 */
5044 if (pIf->hRecvEvent != NIL_RTSEMEVENT)
5045 {
5046 RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
5047 unsigned cMaxWait = 0x1000;
5048 while (pIf->cSleepers && cMaxWait-- > 0)
5049 {
5050 RTSemEventSignal(hRecvEvent);
5051 RTThreadYield();
5052 }
5053 if (pIf->cSleepers)
5054 {
5055 RTThreadSleep(1);
5056
5057 cMaxWait = pIf->cSleepers;
5058 while (pIf->cSleepers && cMaxWait-- > 0)
5059 {
5060 RTSemEventSignal(hRecvEvent);
5061 RTThreadSleep(10);
5062 }
5063 }
5064
5065 RTSemEventDestroy(hRecvEvent);
5066 pIf->hRecvEvent = NIL_RTSEMEVENT;
5067 }
5068#endif
5069
5070 /*
5071 * Unmap user buffer.
5072 */
5073 if (pIf->pIntBuf != pIf->pIntBufDefault)
5074 {
5075 /** @todo user buffer */
5076 }
5077
5078 /*
5079 * Unmap and Free the default buffer.
5080 */
5081 if (pIf->pIntBufDefault)
5082 {
5083 SUPR0MemFree(pIf->pSession, (RTHCUINTPTR)pIf->pIntBufDefault);
5084 pIf->pIntBufDefault = NULL;
5085 pIf->pIntBufDefaultR3 = 0;
5086 pIf->pIntBuf = NULL;
5087 pIf->pIntBufR3 = 0;
5088 }
5089
5090 /*
5091 * Free remaining resources
5092 */
5093 RTSpinlockDestroy(pIf->hRecvInSpinlock);
5094 pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
5095
5096 RTMemFree(pIf->pDstTab);
5097 pIf->pDstTab = NULL;
5098
5099 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End; i++)
5100 intnetR0IfAddrCacheDestroy(&pIf->aAddrCache[i]);
5101
5102 pIf->pvObj = NULL;
5103 RTMemFree(pIf);
5104}
5105
5106
5107/* Forward declaration of trunk reconnection thread function. */
5108static DECLCALLBACK(int) intnetR0TrunkReconnectThread(RTTHREAD hThread, void *pvUser);
5109
5110/**
5111 * Creates a new network interface.
5112 *
5113 * The call must have opened the network for the new interface and is
5114 * responsible for closing it on failure. On success it must leave the network
5115 * opened so the interface destructor can close it.
5116 *
5117 * @returns VBox status code.
5118 * @param pNetwork The network, referenced. The reference is consumed
5119 * on success.
5120 * @param pSession The session handle.
5121 * @param cbSend The size of the send buffer.
5122 * @param cbRecv The size of the receive buffer.
5123 * @param fFlags The open network flags.
5124 * @param pfnRecvAvail The receive available callback to call instead of
5125 * signalling the semaphore (R3 service only).
5126 * @param pvUser The opaque user data to pass to the callback.
5127 * @param phIf Where to store the interface handle.
5128 */
5129static int intnetR0NetworkCreateIf(PINTNETNETWORK pNetwork, PSUPDRVSESSION pSession, unsigned cbSend, unsigned cbRecv,
5130 uint32_t fFlags, PFNINTNETIFRECVAVAIL pfnRecvAvail, void *pvUser, PINTNETIFHANDLE phIf)
5131{
5132 LogFlow(("intnetR0NetworkCreateIf: pNetwork=%p pSession=%p cbSend=%u cbRecv=%u fFlags=%#x phIf=%p\n",
5133 pNetwork, pSession, cbSend, cbRecv, fFlags, phIf));
5134
5135 /*
5136 * Assert input.
5137 */
5138 AssertPtr(pNetwork);
5139 AssertPtr(phIf);
5140#if !defined(VBOX_WITH_INTNET_SERVICE_IN_R3) || !defined(IN_RING3)
5141 Assert(pfnRecvAvail == NULL);
5142 Assert(pvUser == NULL);
5143 RT_NOREF(pfnRecvAvail, pvUser);
5144#endif
5145
5146 /*
5147 * Adjust the flags with defaults for the interface policies.
5148 * Note: Main restricts promiscuous mode per interface.
5149 */
5150 uint32_t const fDefFlags = INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW
5151 | INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK;
5152 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkIfFlags); i++)
5153 if (!(fFlags & g_afIntNetOpenNetworkIfFlags[i].fPair))
5154 fFlags |= g_afIntNetOpenNetworkIfFlags[i].fPair & fDefFlags;
5155
5156 /*
5157 * Make sure that all destination tables as well as the have space of
5158 */
5159 int rc = intnetR0NetworkEnsureTabSpace(pNetwork);
5160 if (RT_FAILURE(rc))
5161 return rc;
5162
5163 /*
5164 * Allocate the interface and initialize it.
5165 */
5166 PINTNETIF pIf = (PINTNETIF)RTMemAllocZ(sizeof(*pIf));
5167 if (!pIf)
5168 return VERR_NO_MEMORY;
5169
5170 memset(&pIf->MacAddr, 0xff, sizeof(pIf->MacAddr)); /* broadcast */
5171 //pIf->fMacSet = false;
5172 //pIf->fPromiscuousReal = false;
5173 //pIf->fActive = false;
5174 //pIf->fNoMoreWaits = false;
5175 pIf->fOpenFlags = fFlags;
5176 //pIf->cYields = 0;
5177 //pIf->pIntBuf = 0;
5178 //pIf->pIntBufR3 = NIL_RTR3PTR;
5179 //pIf->pIntBufDefault = 0;
5180 //pIf->pIntBufDefaultR3 = NIL_RTR3PTR;
5181#if !defined(VBOX_WITH_INTNET_SERVICE_IN_R3) || !defined(IN_RING3)
5182 pIf->hRecvEvent = NIL_RTSEMEVENT;
5183#else
5184 pIf->pfnRecvAvail = pfnRecvAvail;
5185 pIf->pvUserRecvAvail = pvUser;
5186#endif
5187 //pIf->cSleepers = 0;
5188 pIf->hIf = INTNET_HANDLE_INVALID;
5189 pIf->hDestructorThread = NIL_RTNATIVETHREAD;
5190 pIf->pNetwork = pNetwork;
5191 pIf->pSession = pSession;
5192 //pIf->pvObj = NULL;
5193 //pIf->aAddrCache = {0};
5194 pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
5195 pIf->cBusy = 0;
5196 //pIf->pDstTab = NULL;
5197 //pIf->pvIfData = NULL;
5198
5199 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End && RT_SUCCESS(rc); i++)
5200 rc = intnetR0IfAddrCacheInit(&pIf->aAddrCache[i], (INTNETADDRTYPE)i,
5201 !!(pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE));
5202 if (RT_SUCCESS(rc))
5203 rc = intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, (PINTNETDSTTAB *)&pIf->pDstTab);
5204#if !defined(VBOX_WITH_INTNET_SERVICE_IN_R3) || !defined(IN_RING3)
5205 if (RT_SUCCESS(rc))
5206 rc = RTSemEventCreate((PRTSEMEVENT)&pIf->hRecvEvent);
5207#endif
5208 if (RT_SUCCESS(rc))
5209 rc = RTSpinlockCreate(&pIf->hRecvInSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "hRecvInSpinlock");
5210 if (RT_SUCCESS(rc))
5211 {
5212 /*
5213 * Create the default buffer.
5214 */
5215 /** @todo adjust with minimums and apply defaults here. */
5216 cbRecv = RT_ALIGN(RT_MAX(cbRecv, sizeof(INTNETHDR) * 4), INTNETRINGBUF_ALIGNMENT);
5217 cbSend = RT_ALIGN(RT_MAX(cbSend, sizeof(INTNETHDR) * 4), INTNETRINGBUF_ALIGNMENT);
5218 const unsigned cbBuf = RT_ALIGN(sizeof(*pIf->pIntBuf), INTNETRINGBUF_ALIGNMENT) + cbRecv + cbSend;
5219 rc = SUPR0MemAlloc(pIf->pSession, cbBuf, (PRTR0PTR)&pIf->pIntBufDefault, (PRTR3PTR)&pIf->pIntBufDefaultR3);
5220 if (RT_SUCCESS(rc))
5221 {
5222 RT_BZERO(pIf->pIntBufDefault, cbBuf); /** @todo I thought I specified these buggers as clearing the memory... */
5223
5224 pIf->pIntBuf = pIf->pIntBufDefault;
5225 pIf->pIntBufR3 = pIf->pIntBufDefaultR3;
5226 IntNetBufInit(pIf->pIntBuf, cbBuf, cbRecv, cbSend);
5227
5228 /*
5229 * Register the interface with the session and create a handle for it.
5230 */
5231 pIf->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_INTERNAL_NETWORK_INTERFACE,
5232 intnetR0IfDestruct, pIf, pNetwork->pIntNet);
5233 if (pIf->pvObj)
5234 {
5235 rc = RTHandleTableAllocWithCtx(pNetwork->pIntNet->hHtIfs, pIf, pSession, (uint32_t *)&pIf->hIf);
5236 if (RT_SUCCESS(rc))
5237 {
5238 /*
5239 * Finally add the interface to the network, consuming the
5240 * network reference of the caller.
5241 */
5242 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5243
5244 uint32_t iIf = pNetwork->MacTab.cEntries;
5245 Assert(iIf + 1 <= pNetwork->MacTab.cEntriesAllocated);
5246
5247 pNetwork->MacTab.paEntries[iIf].MacAddr = pIf->MacAddr;
5248 pNetwork->MacTab.paEntries[iIf].fActive = false;
5249 pNetwork->MacTab.paEntries[iIf].fPromiscuousEff = false;
5250 pNetwork->MacTab.paEntries[iIf].fPromiscuousSeeTrunk = false;
5251 pNetwork->MacTab.paEntries[iIf].pIf = pIf;
5252
5253 pNetwork->MacTab.cEntries = iIf + 1;
5254 pIf->pNetwork = pNetwork;
5255
5256 /*
5257 * Grab a busy reference (paranoia) to the trunk before releasing
5258 * the spinlock and then notify it about the new interface.
5259 */
5260 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
5261 if (pTrunk)
5262 intnetR0BusyIncTrunk(pTrunk);
5263
5264 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5265
5266 if (pTrunk)
5267 {
5268 Log(("intnetR0NetworkCreateIf: pfnConnectInterface hIf=%RX32\n", pIf->hIf));
5269 if (pTrunk->pIfPort)
5270 rc = pTrunk->pIfPort->pfnConnectInterface(pTrunk->pIfPort, pIf, &pIf->pvIfData);
5271 intnetR0BusyDecTrunk(pTrunk);
5272 }
5273 if (RT_SUCCESS(rc))
5274 {
5275 /*
5276 * We're good!
5277 */
5278 *phIf = pIf->hIf;
5279 Log(("intnetR0NetworkCreateIf: returns VINF_SUCCESS *phIf=%RX32 cbSend=%u cbRecv=%u cbBuf=%u\n",
5280 *phIf, pIf->pIntBufDefault->cbSend, pIf->pIntBufDefault->cbRecv, pIf->pIntBufDefault->cbBuf));
5281 return VINF_SUCCESS;
5282 }
5283 }
5284
5285 SUPR0ObjAddRef(pNetwork->pvObj, pSession);
5286 SUPR0ObjRelease(pIf->pvObj, pSession);
5287 LogFlow(("intnetR0NetworkCreateIf: returns %Rrc\n", rc));
5288 return rc;
5289 }
5290
5291 /* clean up */
5292 SUPR0MemFree(pIf->pSession, (RTHCUINTPTR)pIf->pIntBufDefault);
5293 pIf->pIntBufDefault = NULL;
5294 pIf->pIntBuf = NULL;
5295 }
5296 }
5297
5298 RTSpinlockDestroy(pIf->hRecvInSpinlock);
5299 pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
5300#if !defined(VBOX_WITH_INTNET_SERVICE_IN_R3) || !defined(IN_RING3)
5301 RTSemEventDestroy(pIf->hRecvEvent);
5302 pIf->hRecvEvent = NIL_RTSEMEVENT;
5303#else
5304 pIf->pfnRecvAvail = NULL;
5305 pIf->pvUserRecvAvail = NULL;
5306#endif
5307 RTMemFree(pIf->pDstTab);
5308 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End; i++)
5309 intnetR0IfAddrCacheDestroy(&pIf->aAddrCache[i]);
5310 RTMemFree(pIf);
5311 LogFlow(("intnetR0NetworkCreateIf: returns %Rrc\n", rc));
5312 return rc;
5313}
5314
5315
5316/** @interface_method_impl{INTNETTRUNKSWPORT,pfnSetSGPhys} */
5317static DECLCALLBACK(bool) intnetR0TrunkIfPortSetSGPhys(PINTNETTRUNKSWPORT pSwitchPort, bool fEnable)
5318{
5319 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5320 AssertMsgFailed(("Not implemented because it wasn't required on Darwin\n"));
5321 return ASMAtomicXchgBool(&pThis->fPhysSG, fEnable);
5322}
5323
5324
5325/** @interface_method_impl{INTNETTRUNKSWPORT,pfnReportMacAddress} */
5326static DECLCALLBACK(void) intnetR0TrunkIfPortReportMacAddress(PINTNETTRUNKSWPORT pSwitchPort, PCRTMAC pMacAddr)
5327{
5328 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5329
5330 /*
5331 * Get the network instance and grab the address spinlock before making
5332 * any changes.
5333 */
5334 intnetR0BusyIncTrunk(pThis);
5335 PINTNETNETWORK pNetwork = pThis->pNetwork;
5336 if (pNetwork)
5337 {
5338 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5339
5340 pNetwork->MacTab.HostMac = *pMacAddr;
5341 pThis->MacAddr = *pMacAddr;
5342
5343 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5344 }
5345 else
5346 pThis->MacAddr = *pMacAddr;
5347 intnetR0BusyDecTrunk(pThis);
5348}
5349
5350
5351/** @interface_method_impl{INTNETTRUNKSWPORT,pfnReportPromiscuousMode} */
5352static DECLCALLBACK(void) intnetR0TrunkIfPortReportPromiscuousMode(PINTNETTRUNKSWPORT pSwitchPort, bool fPromiscuous)
5353{
5354 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5355
5356 /*
5357 * Get the network instance and grab the address spinlock before making
5358 * any changes.
5359 */
5360 intnetR0BusyIncTrunk(pThis);
5361 PINTNETNETWORK pNetwork = pThis->pNetwork;
5362 if (pNetwork)
5363 {
5364 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5365
5366 pNetwork->MacTab.fHostPromiscuousReal = fPromiscuous
5367 || (pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE);
5368 pNetwork->MacTab.fHostPromiscuousEff = pNetwork->MacTab.fHostPromiscuousReal
5369 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST);
5370
5371 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5372 }
5373 intnetR0BusyDecTrunk(pThis);
5374}
5375
5376
5377/** @interface_method_impl{INTNETTRUNKSWPORT,pfnReportGsoCapabilities} */
5378static DECLCALLBACK(void) intnetR0TrunkIfPortReportGsoCapabilities(PINTNETTRUNKSWPORT pSwitchPort,
5379 uint32_t fGsoCapabilities, uint32_t fDst)
5380{
5381 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5382
5383 for (unsigned iBit = PDMNETWORKGSOTYPE_END; iBit < 32; iBit++)
5384 Assert(!(fGsoCapabilities & RT_BIT_32(iBit)));
5385 Assert(!(fDst & ~INTNETTRUNKDIR_VALID_MASK));
5386 Assert(fDst);
5387
5388 if (fDst & INTNETTRUNKDIR_HOST)
5389 pThis->fHostGsoCapabilites = fGsoCapabilities;
5390
5391 if (fDst & INTNETTRUNKDIR_WIRE)
5392 pThis->fWireGsoCapabilites = fGsoCapabilities;
5393}
5394
5395
5396/** @interface_method_impl{INTNETTRUNKSWPORT,pfnReportNoPreemptDsts} */
5397static DECLCALLBACK(void) intnetR0TrunkIfPortReportNoPreemptDsts(PINTNETTRUNKSWPORT pSwitchPort, uint32_t fNoPreemptDsts)
5398{
5399 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5400 Assert(!(fNoPreemptDsts & ~INTNETTRUNKDIR_VALID_MASK));
5401
5402 pThis->fNoPreemptDsts = fNoPreemptDsts;
5403}
5404
5405
5406/** @interface_method_impl{INTNETTRUNKSWPORT,pfnDisconnect} */
5407static DECLCALLBACK(void) intnetR0TrunkIfPortDisconnect(PINTNETTRUNKSWPORT pSwitchPort, PINTNETTRUNKIFPORT pIfPort,
5408 PFNINTNETTRUNKIFPORTRELEASEBUSY pfnReleaseBusy)
5409{
5410 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5411
5412 /*
5413 * The caller has marked the trunk instance busy on his side before making
5414 * the call (see method docs) to let us safely grab the network and internal
5415 * network instance pointers without racing the network destruction code
5416 * (intnetR0TrunkIfDestroy (called by intnetR0TrunkIfDestroy) will wait for
5417 * the interface to stop being busy before setting pNetwork to NULL and
5418 * freeing up the resources).
5419 */
5420 PINTNETNETWORK pNetwork = pThis->pNetwork;
5421 if (pNetwork)
5422 {
5423 PINTNET pIntNet = pNetwork->pIntNet;
5424 Assert(pNetwork->pIntNet);
5425
5426 /*
5427 * We must decrease the callers busy count here to prevent deadlocking
5428 * when requesting the big mutex ownership. This will of course
5429 * unblock anyone stuck in intnetR0TrunkIfDestroy doing pfnWaitForIdle
5430 * (the other deadlock party), so we have to revalidate the network
5431 * pointer after taking ownership of the big mutex.
5432 */
5433 if (pfnReleaseBusy)
5434 pfnReleaseBusy(pIfPort);
5435
5436 RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
5437
5438 if (intnetR0NetworkIsValid(pIntNet, pNetwork))
5439 {
5440 Assert(pNetwork->MacTab.pTrunk == pThis); /* Must be valid as long as tehre are no concurrent calls to this method. */
5441 Assert(pThis->pIfPort == pIfPort); /* Ditto */
5442
5443 /*
5444 * Disconnect the trunk and destroy it, similar to what is done int
5445 * intnetR0NetworkDestruct.
5446 */
5447 pIfPort->pfnSetState(pIfPort, INTNETTRUNKIFSTATE_DISCONNECTING);
5448
5449 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5450 pNetwork->MacTab.pTrunk = NULL;
5451 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5452
5453 /*
5454 * Create a system thread that will attempt to re-connect this trunk periodically
5455 * hoping that the corresponding filter module reappears in the system. The thread
5456 * will go away if it succeeds in re-connecting the trunk or if it is signalled.
5457 */
5458 int rc = RTThreadCreate(&pNetwork->hTrunkReconnectThread, intnetR0TrunkReconnectThread, pNetwork,
5459 0, RTTHREADTYPE_INFREQUENT_POLLER, RTTHREADFLAGS_WAITABLE, "TRNKRECON");
5460 AssertRC(rc);
5461
5462 intnetR0TrunkIfDestroy(pThis, pNetwork);
5463 }
5464
5465 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
5466 }
5467 /*
5468 * We must always release the busy reference.
5469 */
5470 else if (pfnReleaseBusy)
5471 pfnReleaseBusy(pIfPort);
5472}
5473
5474
5475/** @interface_method_impl{INTNETTRUNKSWPORT,pfnPreRecv} */
5476static DECLCALLBACK(INTNETSWDECISION) intnetR0TrunkIfPortPreRecv(PINTNETTRUNKSWPORT pSwitchPort,
5477 void const *pvSrc, size_t cbSrc, uint32_t fSrc)
5478{
5479 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5480
5481 /* assert some sanity */
5482 AssertPtr(pvSrc);
5483 AssertReturn(cbSrc >= 6, INTNETSWDECISION_BROADCAST);
5484 Assert(fSrc);
5485
5486 /*
5487 * Mark the trunk as busy, make sure we've got a network and that there are
5488 * some active interfaces around.
5489 */
5490 INTNETSWDECISION enmSwDecision = INTNETSWDECISION_TRUNK;
5491 intnetR0BusyIncTrunk(pThis);
5492 PINTNETNETWORK pNetwork = pThis->pNetwork;
5493 if (RT_LIKELY( pNetwork
5494 && pNetwork->cActiveIFs > 0 ))
5495 {
5496 /*
5497 * Lazy bird! No pre-switching of multicast and shared-MAC-on-wire.
5498 */
5499 PCRTNETETHERHDR pEthHdr = (PCRTNETETHERHDR)pvSrc;
5500 if (intnetR0IsMacAddrMulticast(&pEthHdr->DstMac))
5501 enmSwDecision = INTNETSWDECISION_BROADCAST;
5502 else if ( fSrc == INTNETTRUNKDIR_WIRE
5503 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE))
5504 enmSwDecision = INTNETSWDECISION_BROADCAST;
5505 else
5506 enmSwDecision = intnetR0NetworkPreSwitchUnicast(pNetwork,
5507 fSrc,
5508 cbSrc >= 12 ? &pEthHdr->SrcMac : NULL,
5509 &pEthHdr->DstMac);
5510 }
5511
5512 intnetR0BusyDecTrunk(pThis);
5513 return enmSwDecision;
5514}
5515
5516
5517/** @interface_method_impl{INTNETTRUNKSWPORT,pfnRecv} */
5518static DECLCALLBACK(bool) intnetR0TrunkIfPortRecv(PINTNETTRUNKSWPORT pSwitchPort, void *pvIf, PINTNETSG pSG, uint32_t fSrc)
5519{
5520 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5521
5522 /* assert some sanity */
5523 AssertPtr(pSG);
5524 Assert(fSrc);
5525 NOREF(pvIf); /* later */
5526
5527 /*
5528 * Mark the trunk as busy, make sure we've got a network and that there are
5529 * some active interfaces around.
5530 */
5531 bool fRc = false /* don't drop it */;
5532 intnetR0BusyIncTrunk(pThis);
5533 PINTNETNETWORK pNetwork = pThis->pNetwork;
5534 if (RT_LIKELY( pNetwork
5535 && pNetwork->cActiveIFs > 0 ))
5536 {
5537 /*
5538 * Grab or allocate a destination table.
5539 */
5540 bool const fIntCtx = RTThreadPreemptIsEnabled(NIL_RTTHREAD) || RTThreadIsInInterrupt(NIL_RTTHREAD);
5541 unsigned iDstTab = 0;
5542 PINTNETDSTTAB pDstTab = NULL;
5543 RTSpinlockAcquire(pThis->hDstTabSpinlock);
5544 if (fIntCtx)
5545 {
5546 /* Interrupt or restricted context. */
5547 iDstTab = RTMpCpuIdToSetIndex(RTMpCpuId());
5548 iDstTab %= pThis->cIntDstTabs;
5549 pDstTab = pThis->apIntDstTabs[iDstTab];
5550 if (RT_LIKELY(pDstTab))
5551 pThis->apIntDstTabs[iDstTab] = NULL;
5552 else
5553 {
5554 iDstTab = pThis->cIntDstTabs;
5555 while (iDstTab-- > 0)
5556 {
5557 pDstTab = pThis->apIntDstTabs[iDstTab];
5558 if (pDstTab)
5559 {
5560 pThis->apIntDstTabs[iDstTab] = NULL;
5561 break;
5562 }
5563 }
5564 }
5565 RTSpinlockRelease(pThis->hDstTabSpinlock);
5566 Assert(!pDstTab || iDstTab < pThis->cIntDstTabs);
5567 }
5568 else
5569 {
5570 /*