VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/SrvIntNetR0.cpp@ 33000

Last change on this file since 33000 was 31323, checked in by vboxsync, 14 years ago

SrvIntNetR0.cpp: Fixed lock order inversion in IntNetR0IfSetActive, must take the mutex before grabbing a (paranoid) busy reference.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 198.2 KB
Line 
1/* $Id: SrvIntNetR0.cpp 31323 2010-08-02 18:18:22Z vboxsync $ */
2/** @file
3 * Internal networking - The ring 0 service.
4 */
5
6/*
7 * Copyright (C) 2006-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_SRV_INTNET
23#include <VBox/intnet.h>
24#include <VBox/intnetinline.h>
25#include <VBox/pdmnetinline.h>
26#include <VBox/sup.h>
27#include <VBox/pdm.h>
28#include <VBox/log.h>
29
30#include <iprt/asm.h>
31#include <iprt/assert.h>
32#include <iprt/handletable.h>
33#include <iprt/mp.h>
34#include <iprt/mem.h>
35#include <iprt/net.h>
36#include <iprt/semaphore.h>
37#include <iprt/spinlock.h>
38#include <iprt/string.h>
39#include <iprt/thread.h>
40#include <iprt/time.h>
41
42
43/*******************************************************************************
44* Defined Constants And Macros *
45*******************************************************************************/
46/** @def INTNET_WITH_DHCP_SNOOPING
47 * Enabled DHCP snooping when in shared-mac-on-the-wire mode. */
48#define INTNET_WITH_DHCP_SNOOPING
49
50/** The maximum number of interface in a network. */
51#define INTNET_MAX_IFS (1023 + 1 + 16)
52
53/** The number of entries to grow the destination tables with. */
54#if 0
55# define INTNET_GROW_DSTTAB_SIZE 16
56#else
57# define INTNET_GROW_DSTTAB_SIZE 1
58#endif
59
60/** The wakeup bit in the INTNETIF::cBusy and INTNETRUNKIF::cBusy counters. */
61#define INTNET_BUSY_WAKEUP_MASK RT_BIT_32(30)
62
63
64/*******************************************************************************
65* Structures and Typedefs *
66*******************************************************************************/
67/**
68 * MAC address lookup table entry.
69 */
70typedef struct INTNETMACTABENTRY
71{
72 /** The MAC address of this entry. */
73 RTMAC MacAddr;
74 /** Is it promiscuous. */
75 bool fPromiscuous;
76 /** Is it active.
77 * We ignore the entry if this is clear and may end up sending packets addressed
78 * to this interface onto the trunk. The reasoning for this is that this could
79 * be the interface of a VM that just has been teleported to a different host. */
80 bool fActive;
81 /** Pointer to the network interface. */
82 struct INTNETIF *pIf;
83} INTNETMACTABENTRY;
84/** Pointer to a MAC address lookup table entry. */
85typedef INTNETMACTABENTRY *PINTNETMACTABENTRY;
86
87/**
88 * MAC address lookup table.
89 *
90 * @todo Having this in a separate structure didn't work out as well as it
91 * should. Consider merging it into INTNETNETWORK.
92 */
93typedef struct INTNETMACTAB
94{
95 /** The current number of entries. */
96 uint32_t cEntries;
97 /** The number of entries we've allocated space for. */
98 uint32_t cEntriesAllocated;
99 /** Table entries. */
100 PINTNETMACTABENTRY paEntries;
101
102 /** The host MAC address (reported). */
103 RTMAC HostMac;
104 /** The host promisucous setting (reported). */
105 bool fHostPromiscuous;
106 /** Whether the host is active. */
107 bool fHostActive;
108
109 /** Whether the wire is promiscuous (config). */
110 bool fWirePromiscuous;
111 /** Whether the wire is active. */
112 bool fWireActive;
113
114 /** Pointer to the the trunk interface. */
115 struct INTNETTRUNKIF *pTrunk;
116} INTNETMACTAB;
117/** Pointer to a MAC address . */
118typedef INTNETMACTAB *PINTNETMACTAB;
119
120/**
121 * Destination table.
122 */
123typedef struct INTNETDSTTAB
124{
125 /** The trunk destinations. */
126 uint32_t fTrunkDst;
127 /** Pointer to the trunk interface (referenced) if fTrunkDst is non-zero. */
128 struct INTNETTRUNKIF *pTrunk;
129 /** The number of destination interfaces. */
130 uint32_t cIfs;
131 /** The interfaces (referenced). Variable sized array. */
132 struct
133 {
134 /** The destination interface. */
135 struct INTNETIF *pIf;
136 /** Whether to replace the destination MAC address.
137 * This is used when sharing MAC address with the host on the wire(less). */
138 bool fReplaceDstMac;
139 } aIfs[1];
140} INTNETDSTTAB;
141/** Pointer to a destination table. */
142typedef INTNETDSTTAB *PINTNETDSTTAB;
143/** Pointer to a const destination table. */
144typedef INTNETDSTTAB const *PCINTNETDSTTAB;
145
146
147/** Network layer address type. */
148typedef enum INTNETADDRTYPE
149{
150 /** The invalid 0 entry. */
151 kIntNetAddrType_Invalid = 0,
152 /** IP version 4. */
153 kIntNetAddrType_IPv4,
154 /** IP version 6. */
155 kIntNetAddrType_IPv6,
156 /** IPX. */
157 kIntNetAddrType_IPX,
158 /** The end of the valid values. */
159 kIntNetAddrType_End,
160 /** The usual 32-bit hack. */
161 kIntNetAddrType_32BitHack = 0x7fffffff
162} INTNETADDRTYPE;
163/** Pointer to a network layer address type. */
164typedef INTNETADDRTYPE *PINTNETADDRTYPE;
165
166
167/**
168 * Address and type.
169 */
170typedef struct INTNETADDR
171{
172 /** The address type. */
173 INTNETADDRTYPE enmType;
174 /** The address. */
175 RTNETADDRU Addr;
176} INTNETADDR;
177/** Pointer to an address. */
178typedef INTNETADDR *PINTNETADDR;
179/** Pointer to a const address. */
180typedef INTNETADDR const *PCINTNETADDR;
181
182
183/**
184 * Address cache for a specific network layer.
185 */
186typedef struct INTNETADDRCACHE
187{
188 /** Pointer to the table of addresses. */
189 uint8_t *pbEntries;
190 /** The number of valid address entries. */
191 uint8_t cEntries;
192 /** The number of allocated address entries. */
193 uint8_t cEntriesAlloc;
194 /** The address size. */
195 uint8_t cbAddress;
196 /** The size of an entry. */
197 uint8_t cbEntry;
198} INTNETADDRCACHE;
199/** Pointer to an address cache. */
200typedef INTNETADDRCACHE *PINTNETADDRCACHE;
201/** Pointer to a const address cache. */
202typedef INTNETADDRCACHE const *PCINTNETADDRCACHE;
203
204
205/**
206 * A network interface.
207 *
208 * Unless explicitly stated, all members are protect by the network semaphore.
209 */
210typedef struct INTNETIF
211{
212 /** The MAC address.
213 * This is shadowed by INTNETMACTABENTRY::MacAddr. */
214 RTMAC MacAddr;
215 /** Set if the INTNET::MacAddr member has been explicitly set. */
216 bool fMacSet;
217 /** Set if the interface is in promiscuous mode.
218 * This is shadowed by INTNETMACTABENTRY::fPromiscuous. */
219 bool fPromiscuous;
220 /** Whether the interface is active or not.
221 * This is shadowed by INTNETMACTABENTRY::fActive. */
222 bool fActive;
223 /** Whether someone is currently in the destructor or has indicated that
224 * the end is nigh by means of IntNetR0IfAbortWait. */
225 bool volatile fDestroying;
226 /** Number of yields done to try make the interface read pending data.
227 * We will stop yielding when this reaches a threshold assuming that the VM is
228 * paused or that it simply isn't worth all the delay. It is cleared when a
229 * successful send has been done. */
230 uint32_t cYields;
231 /** Pointer to the current exchange buffer (ring-0). */
232 PINTNETBUF pIntBuf;
233 /** Pointer to ring-3 mapping of the current exchange buffer. */
234 R3PTRTYPE(PINTNETBUF) pIntBufR3;
235 /** Pointer to the default exchange buffer for the interface. */
236 PINTNETBUF pIntBufDefault;
237 /** Pointer to ring-3 mapping of the default exchange buffer. */
238 R3PTRTYPE(PINTNETBUF) pIntBufDefaultR3;
239 /** Event semaphore which a receiver/consumer thread will sleep on while
240 * waiting for data to arrive. */
241 RTSEMEVENT volatile hRecvEvent;
242 /** Number of threads sleeping on the event semaphore. */
243 uint32_t cSleepers;
244 /** The interface handle.
245 * When this is INTNET_HANDLE_INVALID a sleeper which is waking up
246 * should return with the appropriate error condition. */
247 INTNETIFHANDLE volatile hIf;
248 /** Pointer to the network this interface is connected to.
249 * This is protected by the INTNET::hMtxCreateOpenDestroy. */
250 struct INTNETNETWORK *pNetwork;
251 /** The session this interface is associated with. */
252 PSUPDRVSESSION pSession;
253 /** The SUPR0 object id. */
254 void *pvObj;
255 /** The network layer address cache. (Indexed by type, 0 entry isn't used.)
256 * This is protected by the address spinlock of the network. */
257 INTNETADDRCACHE aAddrCache[kIntNetAddrType_End];
258 /** Spinlock protecting the input (producer) side of the receive ring. */
259 RTSPINLOCK hRecvInSpinlock;
260 /** Busy count for tracking destination table references and active sends.
261 * Usually incremented while owning the switch table spinlock. The 30th bit
262 * is used to indicate wakeup. */
263 uint32_t volatile cBusy;
264 /** The preallocated destination table.
265 * This is NULL when it's in use as a precaution against unserialized
266 * transmitting. This is grown when new interfaces are added to the network. */
267 PINTNETDSTTAB volatile pDstTab;
268 /** Pointer to the trunk's per interface data. Can be NULL. */
269 void *pvIfData;
270} INTNETIF;
271/** Pointer to an internal network interface. */
272typedef INTNETIF *PINTNETIF;
273
274
275/**
276 * A trunk interface.
277 */
278typedef struct INTNETTRUNKIF
279{
280 /** The port interface we present to the component. */
281 INTNETTRUNKSWPORT SwitchPort;
282 /** The port interface we get from the component. */
283 PINTNETTRUNKIFPORT pIfPort;
284 /** Pointer to the network we're connect to.
285 * This may be NULL if we're orphaned? */
286 struct INTNETNETWORK *pNetwork;
287 /** The current MAC address for the interface. (reported)
288 * Updated while owning the switch table spinlock. */
289 RTMAC MacAddr;
290 /** Whether to supply physical addresses with the outbound SGs. (reported) */
291 bool fPhysSG;
292 /** Explicit alignment. */
293 bool fUnused;
294 /** Busy count for tracking destination table references and active sends.
295 * Usually incremented while owning the switch table spinlock. The 30th bit
296 * is used to indicate wakeup. */
297 uint32_t volatile cBusy;
298 /** Mask of destinations that pfnXmit cope with disabled preemption for. */
299 uint32_t fNoPreemptDsts;
300 /** The GSO capabilities of the wire destination. (reported) */
301 uint32_t fWireGsoCapabilites;
302 /** The GSO capabilities of the host destination. (reported)
303 * This is as bit map where each bit represents the GSO type with the same
304 * number. */
305 uint32_t fHostGsoCapabilites;
306 /** Header buffer for when we're carving GSO frames. */
307 uint8_t abGsoHdrs[256];
308 /** The destination table spinlock, interrupt safe.
309 * Protects apTaskDstTabs and apIntDstTabs. */
310 RTSPINLOCK hDstTabSpinlock;
311 /** The number of entries in apIntDstTabs. */
312 uint32_t cIntDstTabs;
313 /** The task time destination tables.
314 * @remarks intnetR0NetworkEnsureTabSpace and others ASSUMES this immediately
315 * preceeds apIntDstTabs so that these two tables can be used as one
316 * contiguous one. */
317 PINTNETDSTTAB apTaskDstTabs[2];
318 /** The interrupt / disabled-preemption time destination tables.
319 * This is a variable sized array. */
320 PINTNETDSTTAB apIntDstTabs[1];
321} INTNETTRUNKIF;
322/** Pointer to a trunk interface. */
323typedef INTNETTRUNKIF *PINTNETTRUNKIF;
324
325/** Converts a pointer to INTNETTRUNKIF::SwitchPort to a PINTNETTRUNKIF. */
326#define INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort) ((PINTNETTRUNKIF)(pSwitchPort))
327
328
329/**
330 * Internal representation of a network.
331 */
332typedef struct INTNETNETWORK
333{
334 /** The Next network in the chain.
335 * This is protected by the INTNET::hMtxCreateOpenDestroy. */
336 struct INTNETNETWORK *pNext;
337
338 /** The spinlock protecting MacTab and INTNETTRUNKIF::aAddrCache.
339 * Interrupt safe. */
340 RTSPINLOCK hAddrSpinlock;
341 /** MAC address table.
342 * This doubles as interface collection. */
343 INTNETMACTAB MacTab;
344
345 /** Wait for an interface to stop being busy so it can be removed or have its
346 * destination table replaced. We have to wait upon this while owning the
347 * network mutex. Will only ever have one waiter because of the big mutex. */
348 RTSEMEVENT hEvtBusyIf;
349 /** Pointer to the instance data. */
350 struct INTNET *pIntNet;
351 /** The SUPR0 object id. */
352 void *pvObj;
353 /** Pointer to the temporary buffer that is used when snooping fragmented packets.
354 * This is allocated after this structure if we're sharing the MAC address with
355 * the host. The buffer is INTNETNETWORK_TMP_SIZE big and aligned on a 64-byte boundrary. */
356 uint8_t *pbTmp;
357 /** Network creation flags (INTNET_OPEN_FLAGS_*). */
358 uint32_t fFlags;
359 /** The number of active interfaces (excluding the trunk). */
360 uint32_t cActiveIFs;
361 /** The length of the network name. */
362 uint8_t cchName;
363 /** The network name. */
364 char szName[INTNET_MAX_NETWORK_NAME];
365 /** The trunk type. */
366 INTNETTRUNKTYPE enmTrunkType;
367 /** The trunk name. */
368 char szTrunk[INTNET_MAX_TRUNK_NAME];
369} INTNETNETWORK;
370/** Pointer to an internal network. */
371typedef INTNETNETWORK *PINTNETNETWORK;
372
373/** The size of the buffer INTNETNETWORK::pbTmp points at. */
374#define INTNETNETWORK_TMP_SIZE 2048
375
376
377/**
378 * Internal networking instance.
379 */
380typedef struct INTNET
381{
382 /** Magic number (INTNET_MAGIC). */
383 uint32_t volatile u32Magic;
384 /** Mutex protecting the creation, opening and destruction of both networks and
385 * interfaces. (This means all operations affecting the pNetworks list.) */
386 RTSEMMUTEX hMtxCreateOpenDestroy;
387 /** List of networks. Protected by INTNET::Spinlock. */
388 PINTNETNETWORK volatile pNetworks;
389 /** Handle table for the interfaces. */
390 RTHANDLETABLE hHtIfs;
391} INTNET;
392/** Pointer to an internal network ring-0 instance. */
393typedef struct INTNET *PINTNET;
394
395/** Magic number for the internal network instance data (Hayao Miyazaki). */
396#define INTNET_MAGIC UINT32_C(0x19410105)
397
398
399/*******************************************************************************
400* Global Variables *
401*******************************************************************************/
402/** Pointer to the internal network instance data. */
403static PINTNET volatile g_pIntNet = NULL;
404
405
406/*******************************************************************************
407* Internal Functions *
408*******************************************************************************/
409static PINTNETTRUNKIF intnetR0TrunkIfRetain(PINTNETTRUNKIF pThis);
410static void intnetR0TrunkIfRelease(PINTNETTRUNKIF pThis);
411
412
413/**
414 * Worker for intnetR0SgWritePart that deals with the case where the
415 * request doesn't fit into the first segment.
416 *
417 * @returns true, unless the request or SG invalid.
418 * @param pSG The SG list to write to.
419 * @param off Where to start writing (offset into the SG).
420 * @param cb How much to write.
421 * @param pvBuf The buffer to containing the bits to write.
422 */
423static bool intnetR0SgWritePartSlow(PCINTNETSG pSG, uint32_t off, uint32_t cb, void const *pvBuf)
424{
425 if (RT_UNLIKELY(off + cb > pSG->cbTotal))
426 return false;
427
428 /*
429 * Skip ahead to the segment where off starts.
430 */
431 unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
432 unsigned iSeg = 0;
433 while (off > pSG->aSegs[iSeg].cb)
434 {
435 off -= pSG->aSegs[iSeg++].cb;
436 AssertReturn(iSeg < cSegs, false);
437 }
438
439 /*
440 * Copy the data, hoping that it's all from one segment...
441 */
442 uint32_t cbCanCopy = pSG->aSegs[iSeg].cb - off;
443 if (cbCanCopy >= cb)
444 memcpy((uint8_t *)pSG->aSegs[iSeg].pv + off, pvBuf, cb);
445 else
446 {
447 /* copy the portion in the current segment. */
448 memcpy((uint8_t *)pSG->aSegs[iSeg].pv + off, pvBuf, cbCanCopy);
449 cb -= cbCanCopy;
450
451 /* copy the portions in the other segments. */
452 do
453 {
454 pvBuf = (uint8_t const *)pvBuf + cbCanCopy;
455 iSeg++;
456 AssertReturn(iSeg < cSegs, false);
457
458 cbCanCopy = RT_MIN(cb, pSG->aSegs[iSeg].cb);
459 memcpy(pSG->aSegs[iSeg].pv, pvBuf, cbCanCopy);
460
461 cb -= cbCanCopy;
462 } while (cb > 0);
463 }
464
465 return true;
466}
467
468
469/**
470 * Writes to a part of an SG.
471 *
472 * @returns true on success, false on failure (out of bounds).
473 * @param pSG The SG list to write to.
474 * @param off Where to start writing (offset into the SG).
475 * @param cb How much to write.
476 * @param pvBuf The buffer to containing the bits to write.
477 */
478DECLINLINE(bool) intnetR0SgWritePart(PCINTNETSG pSG, uint32_t off, uint32_t cb, void const *pvBuf)
479{
480 Assert(off + cb > off);
481
482 /* The optimized case. */
483 if (RT_LIKELY( pSG->cSegsUsed == 1
484 || pSG->aSegs[0].cb >= off + cb))
485 {
486 Assert(pSG->cbTotal == pSG->aSegs[0].cb);
487 memcpy((uint8_t *)pSG->aSegs[0].pv + off, pvBuf, cb);
488 return true;
489 }
490 return intnetR0SgWritePartSlow(pSG, off, cb, pvBuf);
491}
492
493
494/**
495 * Reads a byte from a SG list.
496 *
497 * @returns The byte on success. 0xff on failure.
498 * @param pSG The SG list to read.
499 * @param off The offset (into the SG) off the byte.
500 */
501DECLINLINE(uint8_t) intnetR0SgReadByte(PCINTNETSG pSG, uint32_t off)
502{
503 if (RT_LIKELY(pSG->aSegs[0].cb > off))
504 return ((uint8_t const *)pSG->aSegs[0].pv)[off];
505
506 off -= pSG->aSegs[0].cb;
507 unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
508 for (unsigned iSeg = 1; iSeg < cSegs; iSeg++)
509 {
510 if (pSG->aSegs[iSeg].cb > off)
511 return ((uint8_t const *)pSG->aSegs[iSeg].pv)[off];
512 off -= pSG->aSegs[iSeg].cb;
513 }
514 return false;
515}
516
517
518/**
519 * Worker for intnetR0SgReadPart that deals with the case where the
520 * requested data isn't in the first segment.
521 *
522 * @returns true, unless the SG is invalid.
523 * @param pSG The SG list to read.
524 * @param off Where to start reading (offset into the SG).
525 * @param cb How much to read.
526 * @param pvBuf The buffer to read into.
527 */
528static bool intnetR0SgReadPartSlow(PCINTNETSG pSG, uint32_t off, uint32_t cb, void *pvBuf)
529{
530 if (RT_UNLIKELY(off + cb > pSG->cbTotal))
531 return false;
532
533 /*
534 * Skip ahead to the segment where off starts.
535 */
536 unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
537 unsigned iSeg = 0;
538 while (off > pSG->aSegs[iSeg].cb)
539 {
540 off -= pSG->aSegs[iSeg++].cb;
541 AssertReturn(iSeg < cSegs, false);
542 }
543
544 /*
545 * Copy the data, hoping that it's all from one segment...
546 */
547 uint32_t cbCanCopy = pSG->aSegs[iSeg].cb - off;
548 if (cbCanCopy >= cb)
549 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv + off, cb);
550 else
551 {
552 /* copy the portion in the current segment. */
553 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv + off, cbCanCopy);
554 cb -= cbCanCopy;
555
556 /* copy the portions in the other segments. */
557 do
558 {
559 pvBuf = (uint8_t *)pvBuf + cbCanCopy;
560 iSeg++;
561 AssertReturn(iSeg < cSegs, false);
562
563 cbCanCopy = RT_MIN(cb, pSG->aSegs[iSeg].cb);
564 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv, cbCanCopy);
565
566 cb -= cbCanCopy;
567 } while (cb > 0);
568 }
569
570 return true;
571}
572
573
574/**
575 * Reads a part of an SG into a buffer.
576 *
577 * @returns true on success, false on failure (out of bounds).
578 * @param pSG The SG list to read.
579 * @param off Where to start reading (offset into the SG).
580 * @param cb How much to read.
581 * @param pvBuf The buffer to read into.
582 */
583DECLINLINE(bool) intnetR0SgReadPart(PCINTNETSG pSG, uint32_t off, uint32_t cb, void *pvBuf)
584{
585 Assert(off + cb > off);
586
587 /* The optimized case. */
588 if (RT_LIKELY( pSG->cSegsUsed == 1
589 || pSG->aSegs[0].cb >= off + cb))
590 {
591 Assert(pSG->cbTotal == pSG->aSegs[0].cb);
592 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[0].pv + off, cb);
593 return true;
594 }
595 return intnetR0SgReadPartSlow(pSG, off, cb, pvBuf);
596}
597
598
599/**
600 * Wait for a busy counter to reach zero.
601 *
602 * @param pNetwork The network.
603 * @param pcBusy The busy counter.
604 */
605static void intnetR0BusyWait(PINTNETNETWORK pNetwork, uint32_t volatile *pcBusy)
606{
607 if (ASMAtomicReadU32(pcBusy) == 0)
608 return;
609
610 /*
611 * We have to be a bit cautious here so we don't destroy the network or the
612 * semaphore before intnetR0BusyDec has signalled us.
613 */
614
615 /* Reset the semaphore and flip the wakeup bit. */
616 RTSemEventWait(pNetwork->hEvtBusyIf, 0); /* clear it */
617 uint32_t cCurBusy = ASMAtomicReadU32(pcBusy);
618 do
619 {
620 if (cCurBusy == 0)
621 return;
622 AssertMsg(!(cCurBusy & INTNET_BUSY_WAKEUP_MASK), ("%#x\n", cCurBusy));
623 AssertMsg((cCurBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cCurBusy));
624 } while (!ASMAtomicCmpXchgExU32(pcBusy, cCurBusy | INTNET_BUSY_WAKEUP_MASK, cCurBusy, &cCurBusy));
625
626 /* Wait for the count to reach zero. */
627 do
628 {
629 int rc2 = RTSemEventWait(pNetwork->hEvtBusyIf, 30000); NOREF(rc2);
630 //AssertMsg(RT_SUCCESS(rc2), ("rc=%Rrc *pcBusy=%#x (%#x)\n", rc2, ASMAtomicReadU32(pcBusy), cCurBusy ));
631 cCurBusy = ASMAtomicReadU32(pcBusy);
632 AssertMsg((cCurBusy & INTNET_BUSY_WAKEUP_MASK), ("%#x\n", cCurBusy));
633 AssertMsg((cCurBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cCurBusy));
634 } while ( cCurBusy != INTNET_BUSY_WAKEUP_MASK
635 || !ASMAtomicCmpXchgU32(pcBusy, 0, INTNET_BUSY_WAKEUP_MASK));
636}
637
638
639/**
640 * Decrements the busy counter and maybe wakes up any threads waiting for it to
641 * reach zero.
642 *
643 * @param pNetwork The network.
644 * @param pcBusy The busy counter.
645 */
646DECLINLINE(void) intnetR0BusyDec(PINTNETNETWORK pNetwork, uint32_t volatile *pcBusy)
647{
648 uint32_t cNewBusy = ASMAtomicDecU32(pcBusy);
649 if (RT_UNLIKELY( cNewBusy == INTNET_BUSY_WAKEUP_MASK
650 && pNetwork))
651 RTSemEventSignal(pNetwork->hEvtBusyIf);
652 AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
653}
654
655
656/**
657 * Increments the busy count of the specified interface.
658 *
659 * The caller must own the MAC address table spinlock.
660 *
661 * @param pIf The interface.
662 */
663DECLINLINE(void) intnetR0BusyDecIf(PINTNETIF pIf)
664{
665 intnetR0BusyDec(pIf->pNetwork, &pIf->cBusy);
666}
667
668
669/**
670 * Increments the busy count of the specified interface.
671 *
672 * The caller must own the MAC address table spinlock or an explicity reference.
673 *
674 * @param pTrunk The trunk.
675 */
676DECLINLINE(void) intnetR0BusyDecTrunk(PINTNETTRUNKIF pTrunk)
677{
678 intnetR0BusyDec(pTrunk->pNetwork, &pTrunk->cBusy);
679}
680
681
682/**
683 * Increments the busy count of the specified interface.
684 *
685 * The caller must own the MAC address table spinlock or an explicity reference.
686 *
687 * @param pIf The interface.
688 */
689DECLINLINE(void) intnetR0BusyIncIf(PINTNETIF pIf)
690{
691 uint32_t cNewBusy = ASMAtomicIncU32(&pIf->cBusy);
692 AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
693 NOREF(cNewBusy);
694}
695
696
697/**
698 * Increments the busy count of the specified interface.
699 *
700 * The caller must own the MAC address table spinlock or an explicity reference.
701 *
702 * @param pTrunk The trunk.
703 */
704DECLINLINE(void) intnetR0BusyIncTrunk(PINTNETTRUNKIF pTrunk)
705{
706 uint32_t cNewBusy = ASMAtomicIncU32(&pTrunk->cBusy);
707 AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
708 NOREF(cNewBusy);
709}
710
711
712/**
713 * Retain an interface.
714 *
715 * @returns VBox status code, can assume success in most situations.
716 * @param pIf The interface instance.
717 * @param pSession The current session.
718 */
719DECLINLINE(int) intnetR0IfRetain(PINTNETIF pIf, PSUPDRVSESSION pSession)
720{
721 int rc = SUPR0ObjAddRefEx(pIf->pvObj, pSession, true /* fNoBlocking */);
722 AssertRCReturn(rc, rc);
723 return VINF_SUCCESS;
724}
725
726
727/**
728 * Release an interface previously retained by intnetR0IfRetain or
729 * by handle lookup/freeing.
730 *
731 * @returns true if destroyed, false if not.
732 * @param pIf The interface instance.
733 * @param pSession The current session.
734 */
735DECLINLINE(bool) intnetR0IfRelease(PINTNETIF pIf, PSUPDRVSESSION pSession)
736{
737 int rc = SUPR0ObjRelease(pIf->pvObj, pSession);
738 AssertRC(rc);
739 return rc == VINF_OBJECT_DESTROYED;
740}
741
742
743/**
744 * RTHandleCreateEx callback that retains an object in the
745 * handle table before returning it.
746 *
747 * (Avoids racing the freeing of the handle.)
748 *
749 * @returns VBox status code.
750 * @param hHandleTable The handle table (ignored).
751 * @param pvObj The object (INTNETIF).
752 * @param pvCtx The context (SUPDRVSESSION).
753 * @param pvUser The user context (ignored).
754 */
755static DECLCALLBACK(int) intnetR0IfRetainHandle(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser)
756{
757 NOREF(pvUser);
758 NOREF(hHandleTable);
759 PINTNETIF pIf = (PINTNETIF)pvObj;
760 if (pIf->hIf != INTNET_HANDLE_INVALID) /* Don't try retain it if called from intnetR0IfDestruct. */
761 return intnetR0IfRetain(pIf, (PSUPDRVSESSION)pvCtx);
762 return VINF_SUCCESS;
763}
764
765
766
767/**
768 * Checks if the interface has a usable MAC address or not.
769 *
770 * @returns true if MacAddr is usable, false if not.
771 * @param pIf The interface.
772 */
773DECL_FORCE_INLINE(bool) intnetR0IfHasMacAddr(PINTNETIF pIf)
774{
775 return pIf->fMacSet || !(pIf->MacAddr.au8[0] & 1);
776}
777
778
779/**
780 * Locates the MAC address table entry for the given interface.
781 *
782 * The caller holds the MAC address table spinlock, obviously.
783 *
784 * @returns Pointer to the entry on if found, NULL if not.
785 * @param pNetwork The network.
786 * @param pIf The interface.
787 */
788DECLINLINE(PINTNETMACTABENTRY) intnetR0NetworkFindMacAddrEntry(PINTNETNETWORK pNetwork, PINTNETIF pIf)
789{
790 uint32_t iIf = pNetwork->MacTab.cEntries;
791 while (iIf-- > 0)
792 {
793 if (pNetwork->MacTab.paEntries[iIf].pIf == pIf)
794 return &pNetwork->MacTab.paEntries[iIf];
795 }
796 return NULL;
797}
798
799
800/**
801 * Checks if the IPv4 address is a broadcast address.
802 * @returns true/false.
803 * @param Addr The address, network endian.
804 */
805DECLINLINE(bool) intnetR0IPv4AddrIsBroadcast(RTNETADDRIPV4 Addr)
806{
807 /* Just check for 255.255.255.255 atm. */
808 return Addr.u == UINT32_MAX;
809}
810
811
812/**
813 * Checks if the IPv4 address is a good interface address.
814 * @returns true/false.
815 * @param Addr The address, network endian.
816 */
817DECLINLINE(bool) intnetR0IPv4AddrIsGood(RTNETADDRIPV4 Addr)
818{
819 /* Usual suspects. */
820 if ( Addr.u == UINT32_MAX /* 255.255.255.255 - broadcast. */
821 || Addr.au8[0] == 0) /* Current network, can be used as source address. */
822 return false;
823
824 /* Unusual suspects. */
825 if (RT_UNLIKELY( Addr.au8[0] == 127 /* Loopback */
826 || (Addr.au8[0] & 0xf0) == 224 /* Multicast */
827 ))
828 return false;
829 return true;
830}
831
832
833/**
834 * Gets the address size of a network layer type.
835 *
836 * @returns size in bytes.
837 * @param enmType The type.
838 */
839DECLINLINE(uint8_t) intnetR0AddrSize(INTNETADDRTYPE enmType)
840{
841 switch (enmType)
842 {
843 case kIntNetAddrType_IPv4: return 4;
844 case kIntNetAddrType_IPv6: return 16;
845 case kIntNetAddrType_IPX: return 4 + 6;
846 default: AssertFailedReturn(0);
847 }
848}
849
850
851/**
852 * Compares two address to see if they are equal, assuming naturally align structures.
853 *
854 * @returns true if equal, false if not.
855 * @param pAddr1 The first address.
856 * @param pAddr2 The second address.
857 * @param cbAddr The address size.
858 */
859DECLINLINE(bool) intnetR0AddrUIsEqualEx(PCRTNETADDRU pAddr1, PCRTNETADDRU pAddr2, uint8_t const cbAddr)
860{
861 switch (cbAddr)
862 {
863 case 4: /* IPv4 */
864 return pAddr1->au32[0] == pAddr2->au32[0];
865 case 16: /* IPv6 */
866 return pAddr1->au64[0] == pAddr2->au64[0]
867 && pAddr1->au64[1] == pAddr2->au64[1];
868 case 10: /* IPX */
869 return pAddr1->au64[0] == pAddr2->au64[0]
870 && pAddr1->au16[4] == pAddr2->au16[4];
871 default:
872 AssertFailedReturn(false);
873 }
874}
875
876
877/**
878 * Worker for intnetR0IfAddrCacheLookup that performs the lookup
879 * in the remaining cache entries after the caller has check the
880 * most likely ones.
881 *
882 * @returns -1 if not found, the index of the cache entry if found.
883 * @param pCache The cache.
884 * @param pAddr The address.
885 * @param cbAddr The address size (optimization).
886 */
887static int intnetR0IfAddrCacheLookupSlow(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
888{
889 unsigned i = pCache->cEntries - 2;
890 uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
891 while (i >= 1)
892 {
893 if (intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr))
894 return i;
895 pbEntry -= pCache->cbEntry;
896 i--;
897 }
898
899 return -1;
900}
901
902/**
903 * Lookup an address in a cache without any expectations.
904 *
905 * @returns -1 if not found, the index of the cache entry if found.
906 * @param pCache The cache.
907 * @param pAddr The address.
908 * @param cbAddr The address size (optimization).
909 */
910DECLINLINE(int) intnetR0IfAddrCacheLookup(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
911{
912 Assert(pCache->cbAddress == cbAddr);
913
914 /*
915 * The optimized case is when there is one cache entry and
916 * it doesn't match.
917 */
918 unsigned i = pCache->cEntries;
919 if ( i > 0
920 && intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr))
921 return 0;
922 if (i <= 1)
923 return -1;
924
925 /*
926 * Check the last entry.
927 */
928 i--;
929 if (intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * i), pAddr, cbAddr))
930 return i;
931 if (i <= 1)
932 return -1;
933
934 return intnetR0IfAddrCacheLookupSlow(pCache, pAddr, cbAddr);
935}
936
937
938/** Same as intnetR0IfAddrCacheLookup except we expect the address to be present already. */
939DECLINLINE(int) intnetR0IfAddrCacheLookupLikely(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
940{
941 /** @todo implement this. */
942 return intnetR0IfAddrCacheLookup(pCache, pAddr, cbAddr);
943}
944
945
946/**
947 * Worker for intnetR0IfAddrCacheLookupUnlikely that performs
948 * the lookup in the remaining cache entries after the caller
949 * has check the most likely ones.
950 *
951 * The routine is expecting not to find the address.
952 *
953 * @returns -1 if not found, the index of the cache entry if found.
954 * @param pCache The cache.
955 * @param pAddr The address.
956 * @param cbAddr The address size (optimization).
957 */
958static int intnetR0IfAddrCacheInCacheUnlikelySlow(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
959{
960 /*
961 * Perform a full table lookup.
962 */
963 unsigned i = pCache->cEntries - 2;
964 uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
965 while (i >= 1)
966 {
967 if (RT_UNLIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr)))
968 return i;
969 pbEntry -= pCache->cbEntry;
970 i--;
971 }
972
973 return -1;
974}
975
976
977/**
978 * Lookup an address in a cache expecting not to find it.
979 *
980 * @returns -1 if not found, the index of the cache entry if found.
981 * @param pCache The cache.
982 * @param pAddr The address.
983 * @param cbAddr The address size (optimization).
984 */
985DECLINLINE(int) intnetR0IfAddrCacheLookupUnlikely(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
986{
987 Assert(pCache->cbAddress == cbAddr);
988
989 /*
990 * The optimized case is when there is one cache entry and
991 * it doesn't match.
992 */
993 unsigned i = pCache->cEntries;
994 if (RT_UNLIKELY( i > 0
995 && intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr)))
996 return 0;
997 if (RT_LIKELY(i <= 1))
998 return -1;
999
1000 /*
1001 * Then check the last entry and return if there are just two cache entries.
1002 */
1003 i--;
1004 if (RT_UNLIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * i), pAddr, cbAddr)))
1005 return i;
1006 if (i <= 1)
1007 return -1;
1008
1009 return intnetR0IfAddrCacheInCacheUnlikelySlow(pCache, pAddr, cbAddr);
1010}
1011
1012
1013/**
1014 * Deletes a specific cache entry.
1015 *
1016 * Worker for intnetR0NetworkAddrCacheDelete and intnetR0NetworkAddrCacheDeleteMinusIf.
1017 *
1018 * @param pIf The interface (for logging).
1019 * @param pCache The cache.
1020 * @param iEntry The entry to delete.
1021 * @param pszMsg Log message.
1022 */
1023static void intnetR0IfAddrCacheDeleteIt(PINTNETIF pIf, PINTNETADDRCACHE pCache, int iEntry, const char *pszMsg)
1024{
1025 AssertReturnVoid(iEntry < pCache->cEntries);
1026 AssertReturnVoid(iEntry >= 0);
1027#ifdef LOG_ENABLED
1028 INTNETADDRTYPE enmAddrType = (INTNETADDRTYPE)(uintptr_t)(pCache - &pIf->aAddrCache[0]);
1029 PCRTNETADDRU pAddr = (PCRTNETADDRU)(pCache->pbEntries + iEntry * pCache->cbEntry);
1030 switch (enmAddrType)
1031 {
1032 case kIntNetAddrType_IPv4:
1033 Log(("intnetR0IfAddrCacheDeleteIt: hIf=%#x MAC=%.6Rhxs IPv4 added #%d %d.%d.%d.%d %s\n",
1034 pIf->hIf, &pIf->MacAddr, iEntry, pAddr->au8[0], pAddr->au8[1], pAddr->au8[2], pAddr->au8[3], pszMsg));
1035 break;
1036 default:
1037 Log(("intnetR0IfAddrCacheDeleteIt: hIf=%RX32 MAC=%.6Rhxs type=%d #%d %.*Rhxs %s\n",
1038 pIf->hIf, &pIf->MacAddr, enmAddrType, iEntry, pCache->cbAddress, pAddr, pszMsg));
1039 break;
1040 }
1041#endif
1042
1043 pCache->cEntries--;
1044 if (iEntry < pCache->cEntries)
1045 memmove(pCache->pbEntries + iEntry * pCache->cbEntry,
1046 pCache->pbEntries + (iEntry + 1) * pCache->cbEntry,
1047 (pCache->cEntries - iEntry) * pCache->cbEntry);
1048}
1049
1050
1051/**
1052 * Deletes an address from the cache, assuming it isn't actually in the cache.
1053 *
1054 * May or may not own the spinlock when calling this.
1055 *
1056 * @param pIf The interface (for logging).
1057 * @param pCache The cache.
1058 * @param pAddr The address.
1059 * @param cbAddr The address size (optimization).
1060 */
1061DECLINLINE(void) intnetR0IfAddrCacheDelete(PINTNETIF pIf, PINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr, const char *pszMsg)
1062{
1063 int i = intnetR0IfAddrCacheLookup(pCache, pAddr, cbAddr);
1064 if (RT_UNLIKELY(i >= 0))
1065 intnetR0IfAddrCacheDeleteIt(pIf, pCache, i, pszMsg);
1066}
1067
1068
1069/**
1070 * Deletes the address from all the interface caches.
1071 *
1072 * This is used to remove stale entries that has been reassigned to
1073 * other machines on the network.
1074 *
1075 * @param pNetwork The network.
1076 * @param pAddr The address.
1077 * @param enmType The address type.
1078 * @param cbAddr The address size (optimization).
1079 * @param pszMsg Log message.
1080 */
1081DECLINLINE(void) intnetR0NetworkAddrCacheDelete(PINTNETNETWORK pNetwork, PCRTNETADDRU pAddr, INTNETADDRTYPE const enmType,
1082 uint8_t const cbAddr, const char *pszMsg)
1083{
1084 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1085 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
1086
1087 uint32_t iIf = pNetwork->MacTab.cEntries;
1088 while (iIf--)
1089 {
1090 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
1091 int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
1092 if (RT_UNLIKELY(i >= 0))
1093 intnetR0IfAddrCacheDeleteIt(pIf, &pIf->aAddrCache[enmType], i, pszMsg);
1094 }
1095
1096 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
1097}
1098
1099
1100/**
1101 * Deletes the address from all the interface caches except the specified one.
1102 *
1103 * This is used to remove stale entries that has been reassigned to
1104 * other machines on the network.
1105 *
1106 * @param pNetwork The network.
1107 * @param pAddr The address.
1108 * @param enmType The address type.
1109 * @param cbAddr The address size (optimization).
1110 * @param pszMsg Log message.
1111 */
1112DECLINLINE(void) intnetR0NetworkAddrCacheDeleteMinusIf(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, PCRTNETADDRU pAddr,
1113 INTNETADDRTYPE const enmType, uint8_t const cbAddr, const char *pszMsg)
1114{
1115 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1116 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
1117
1118 uint32_t iIf = pNetwork->MacTab.cEntries;
1119 while (iIf--)
1120 {
1121 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
1122 if (pIf != pIfSender)
1123 {
1124 int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
1125 if (RT_UNLIKELY(i >= 0))
1126 intnetR0IfAddrCacheDeleteIt(pIf, &pIf->aAddrCache[enmType], i, pszMsg);
1127 }
1128 }
1129
1130 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
1131}
1132
1133
1134/**
1135 * Lookup an address on the network, returning the (first) interface having it
1136 * in its address cache.
1137 *
1138 * @returns Pointer to the interface on success, NULL if not found. The caller
1139 * must release the interface by calling intnetR0BusyDecIf.
1140 * @param pNetwork The network.
1141 * @param pAddr The address to lookup.
1142 * @param enmType The address type.
1143 * @param cbAddr The size of the address.
1144 */
1145DECLINLINE(PINTNETIF) intnetR0NetworkAddrCacheLookupIf(PINTNETNETWORK pNetwork, PCRTNETADDRU pAddr, INTNETADDRTYPE const enmType, uint8_t const cbAddr)
1146{
1147 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1148 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
1149
1150 uint32_t iIf = pNetwork->MacTab.cEntries;
1151 while (iIf--)
1152 {
1153 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
1154 int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
1155 if (i >= 0)
1156 {
1157 intnetR0BusyIncIf(pIf);
1158 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
1159 return pIf;
1160 }
1161 }
1162
1163 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
1164 return NULL;
1165}
1166
1167
1168/**
1169 * Adds an address to the cache, the caller is responsible for making sure it's
1170 * not already in the cache.
1171 *
1172 * The caller must not
1173 *
1174 * @param pIf The interface (for logging).
1175 * @param pCache The address cache.
1176 * @param pAddr The address.
1177 * @param pszMsg log message.
1178 */
1179static void intnetR0IfAddrCacheAddIt(PINTNETIF pIf, PINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, const char *pszMsg)
1180{
1181 PINTNETNETWORK pNetwork = pIf->pNetwork;
1182 AssertReturnVoid(pNetwork);
1183 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1184 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
1185
1186 if (RT_UNLIKELY(!pCache->cEntriesAlloc))
1187 {
1188 /* This shouldn't happen*/
1189 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
1190 return;
1191 }
1192
1193 /* When the table is full, drop the older entry (FIFO). Do proper ageing? */
1194 if (pCache->cEntries >= pCache->cEntriesAlloc)
1195 {
1196 Log(("intnetR0IfAddrCacheAddIt: type=%d replacing %.*Rhxs\n",
1197 (int)(uintptr_t)(pCache - &pIf->aAddrCache[0]), pCache->cbAddress, pCache->pbEntries));
1198 memmove(pCache->pbEntries, pCache->pbEntries + pCache->cbEntry, pCache->cbEntry * (pCache->cEntries - 1));
1199 pCache->cEntries--;
1200 Assert(pCache->cEntries < pCache->cEntriesAlloc);
1201 }
1202
1203 /*
1204 * Add the new entry to the end of the array.
1205 */
1206 uint8_t *pbEntry = pCache->pbEntries + pCache->cEntries * pCache->cbEntry;
1207 memcpy(pbEntry, pAddr, pCache->cbAddress);
1208 memset(pbEntry + pCache->cbAddress, '\0', pCache->cbEntry - pCache->cbAddress);
1209#ifdef LOG_ENABLED
1210 INTNETADDRTYPE enmAddrType = (INTNETADDRTYPE)(uintptr_t)(pCache - &pIf->aAddrCache[0]);
1211 switch (enmAddrType)
1212 {
1213 case kIntNetAddrType_IPv4:
1214 Log(("intnetR0IfAddrCacheAddIt: hIf=%#x MAC=%.6Rhxs IPv4 added #%d %d.%d.%d.%d %s\n",
1215 pIf->hIf, &pIf->MacAddr, pCache->cEntries, pAddr->au8[0], pAddr->au8[1], pAddr->au8[2], pAddr->au8[3], pszMsg));
1216 break;
1217 default:
1218 Log(("intnetR0IfAddrCacheAddIt: hIf=%#x MAC=%.6Rhxs type=%d added #%d %.*Rhxs %s\n",
1219 pIf->hIf, &pIf->MacAddr, enmAddrType, pCache->cEntries, pCache->cbAddress, pAddr, pszMsg));
1220 break;
1221 }
1222#endif
1223 pCache->cEntries++;
1224 Assert(pCache->cEntries <= pCache->cEntriesAlloc);
1225
1226 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
1227}
1228
1229
1230/**
1231 * A intnetR0IfAddrCacheAdd worker that performs the rest of the lookup.
1232 *
1233 * @param pIf The interface (for logging).
1234 * @param pCache The address cache.
1235 * @param pAddr The address.
1236 * @param cbAddr The size of the address (optimization).
1237 * @param pszMsg Log message.
1238 */
1239static void intnetR0IfAddrCacheAddSlow(PINTNETIF pIf, PINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr, const char *pszMsg)
1240{
1241 /*
1242 * Check all but the first and last entries, the caller
1243 * has already checked those.
1244 */
1245 int i = pCache->cEntries - 2;
1246 uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry;
1247 while (i >= 1)
1248 {
1249 if (RT_LIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr)))
1250 return;
1251 pbEntry += pCache->cbEntry;
1252 i--;
1253 }
1254
1255 /*
1256 * Not found, add it.
1257 */
1258 intnetR0IfAddrCacheAddIt(pIf, pCache, pAddr, pszMsg);
1259}
1260
1261
1262/**
1263 * Adds an address to the cache if it's not already there.
1264 *
1265 * Must not own any spinlocks when calling this function.
1266 *
1267 * @param pIf The interface (for logging).
1268 * @param pCache The address cache.
1269 * @param pAddr The address.
1270 * @param cbAddr The size of the address (optimization).
1271 * @param pszMsg Log message.
1272 */
1273DECLINLINE(void) intnetR0IfAddrCacheAdd(PINTNETIF pIf, PINTNETADDRCACHE pCache, PCRTNETADDRU pAddr,
1274 uint8_t const cbAddr, const char *pszMsg)
1275{
1276 Assert(pCache->cbAddress == cbAddr);
1277
1278 /*
1279 * The optimized case is when the address the first or last cache entry.
1280 */
1281 unsigned i = pCache->cEntries;
1282 if (RT_LIKELY( i > 0
1283 && ( intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr)
1284 || (i > 1
1285 && intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * i), pAddr, cbAddr))) ))
1286 return;
1287 intnetR0IfAddrCacheAddSlow(pIf, pCache, pAddr, cbAddr, pszMsg);
1288}
1289
1290
1291/**
1292 * Destroys the specified address cache.
1293 * @param pCache The address cache.
1294 */
1295static void intnetR0IfAddrCacheDestroy(PINTNETADDRCACHE pCache)
1296{
1297 void *pvFree = pCache->pbEntries;
1298 pCache->pbEntries = NULL;
1299 pCache->cEntries = 0;
1300 pCache->cEntriesAlloc = 0;
1301 RTMemFree(pvFree);
1302}
1303
1304
1305/**
1306 * Initialize the address cache for the specified address type.
1307 *
1308 * The cache storage is preallocated and fixed size so that we can handle
1309 * inserts from problematic contexts.
1310 *
1311 * @returns VINF_SUCCESS or VERR_NO_MEMORY.
1312 * @param pCache The cache to initialize.
1313 * @param enmAddrType The address type.
1314 * @param fEnabled Whether the address cache is enabled or not.
1315 */
1316static int intnetR0IfAddrCacheInit(PINTNETADDRCACHE pCache, INTNETADDRTYPE enmAddrType, bool fEnabled)
1317{
1318 pCache->cEntries = 0;
1319 pCache->cbAddress = intnetR0AddrSize(enmAddrType);
1320 pCache->cbEntry = RT_ALIGN(pCache->cbAddress, 4);
1321 if (fEnabled)
1322 {
1323 pCache->cEntriesAlloc = 32;
1324 pCache->pbEntries = (uint8_t *)RTMemAllocZ(pCache->cEntriesAlloc * pCache->cbEntry);
1325 if (!pCache->pbEntries)
1326 return VERR_NO_MEMORY;
1327 }
1328 else
1329 {
1330 pCache->cEntriesAlloc = 0;
1331 pCache->pbEntries = NULL;
1332 }
1333 return VINF_SUCCESS;
1334}
1335
1336
1337/**
1338 * Is it a multicast or broadcast MAC address?
1339 *
1340 * @returns true if multicast, false if not.
1341 * @param pMacAddr The address to inspect.
1342 */
1343DECL_FORCE_INLINE(bool) intnetR0IsMacAddrMulticast(PCRTMAC pMacAddr)
1344{
1345 return !!(pMacAddr->au8[0] & 0x01);
1346}
1347
1348
1349/**
1350 * Is it a dummy MAC address?
1351 *
1352 * We use dummy MAC addresses for interfaces which we don't know the MAC
1353 * address of because they haven't sent anything (learning) or explicitly set
1354 * it.
1355 *
1356 * @returns true if dummy, false if not.
1357 * @param pMacAddr The address to inspect.
1358 */
1359DECL_FORCE_INLINE(bool) intnetR0IsMacAddrDummy(PCRTMAC pMacAddr)
1360{
1361 /* The dummy address are broadcast addresses, don't bother check it all. */
1362 return pMacAddr->au16[0] == 0xffff;
1363}
1364
1365
1366/**
1367 * Compares two MAC addresses.
1368 *
1369 * @returns true if equal, false if not.
1370 * @param pDstAddr1 Address 1.
1371 * @param pDstAddr2 Address 2.
1372 */
1373DECL_FORCE_INLINE(bool) intnetR0AreMacAddrsEqual(PCRTMAC pDstAddr1, PCRTMAC pDstAddr2)
1374{
1375 return pDstAddr1->au16[2] == pDstAddr2->au16[2]
1376 && pDstAddr1->au16[1] == pDstAddr2->au16[1]
1377 && pDstAddr1->au16[0] == pDstAddr2->au16[0];
1378}
1379
1380
1381/**
1382 * Switch a unicast frame based on the network layer address (OSI level 3) and
1383 * return a destination table.
1384 *
1385 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1386 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1387 * @param pNetwork The network to switch on.
1388 * @param pDstMacAddr The destination MAC address.
1389 * @param enmL3AddrType The level-3 destination address type.
1390 * @param pL3Addr The level-3 destination address.
1391 * @param cbL3Addr The size of the level-3 destination address.
1392 * @param fSrc The frame source (INTNETTRUNKDIR_WIRE).
1393 * @param pDstTab The destination output table.
1394 */
1395static INTNETSWDECISION intnetR0NetworkSwitchLevel3(PINTNETNETWORK pNetwork, PCRTMAC pDstMacAddr,
1396 INTNETADDRTYPE enmL3AddrType, PCRTNETADDRU pL3Addr, uint8_t cbL3Addr,
1397 uint32_t fSrc, PINTNETDSTTAB pDstTab)
1398{
1399 Assert(fSrc == INTNETTRUNKDIR_WIRE);
1400
1401 /*
1402 * Grab the spinlock first and do the switching.
1403 */
1404 PINTNETMACTAB pTab = &pNetwork->MacTab;
1405 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1406 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
1407
1408 pDstTab->fTrunkDst = 0;
1409 pDstTab->pTrunk = 0;
1410 pDstTab->cIfs = 0;
1411
1412 /* Find exactly matching or promiscuous interfaces. */
1413 uint32_t cExactHits = 0;
1414 uint32_t iIfMac = pTab->cEntries;
1415 while (iIfMac-- > 0)
1416 {
1417 if (pTab->paEntries[iIfMac].fActive)
1418 {
1419 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1420 bool fExact = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmL3AddrType], pL3Addr, cbL3Addr) >= 0;
1421 if (fExact || pTab->paEntries[iIfMac].fPromiscuous)
1422 {
1423 cExactHits += fExact;
1424
1425 uint32_t iIfDst = pDstTab->cIfs++;
1426 pDstTab->aIfs[iIfDst].pIf = pIf;
1427 pDstTab->aIfs[iIfDst].fReplaceDstMac = fExact;
1428 intnetR0BusyIncIf(pIf);
1429 }
1430 }
1431 }
1432
1433 /* Does it match the host, or is the host promiscuous? */
1434 if (pTab->fHostActive)
1435 {
1436 bool fExact = intnetR0AreMacAddrsEqual(&pTab->HostMac, pDstMacAddr);
1437 if ( fExact
1438 || intnetR0IsMacAddrDummy(&pTab->HostMac)
1439 || pTab->fHostPromiscuous)
1440 {
1441 cExactHits += fExact;
1442 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1443 }
1444 }
1445
1446 /* Hit the wire if there are no exact matches or if it's in promiscuous mode. */
1447 if (pTab->fWireActive && (!cExactHits || pTab->fWirePromiscuous))
1448 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1449 pDstTab->fTrunkDst &= ~fSrc;
1450 if (pDstTab->fTrunkDst)
1451 {
1452 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1453 pDstTab->pTrunk = pTrunk;
1454 intnetR0BusyIncTrunk(pTrunk);
1455 }
1456
1457 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
1458 return pDstTab->cIfs
1459 ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST)
1460 : (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK);
1461}
1462
1463
1464/**
1465 * Pre-switch a unicast MAC address.
1466 *
1467 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1468 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1469 * @param pNetwork The network to switch on.
1470 * @param fSrc The frame source.
1471 * @param pSrcAddr The source address of the frame.
1472 * @param pDstAddr The destination address of the frame.
1473 */
1474static INTNETSWDECISION intnetR0NetworkPreSwitchUnicast(PINTNETNETWORK pNetwork, uint32_t fSrc, PCRTMAC pSrcAddr,
1475 PCRTMAC pDstAddr)
1476{
1477 Assert(!intnetR0IsMacAddrMulticast(pDstAddr));
1478
1479 /*
1480 * Grab the spinlock first and do the switching.
1481 */
1482 INTNETSWDECISION enmSwDecision = INTNETSWDECISION_BROADCAST;
1483 PINTNETMACTAB pTab = &pNetwork->MacTab;
1484 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1485 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
1486
1487 /* Iterate the internal network interfaces and look for matching source and
1488 destination addresses. */
1489 uint32_t cExactHits = 0;
1490 uint32_t iIfMac = pTab->cEntries;
1491 while (iIfMac-- > 0)
1492 {
1493 if (pTab->paEntries[iIfMac].fActive)
1494 {
1495 /* Unknown interface address? */
1496 if (intnetR0IsMacAddrDummy(&pTab->paEntries[iIfMac].MacAddr))
1497 break;
1498
1499 /* Paranoia - this shouldn't happen, right? */
1500 if ( pSrcAddr
1501 && intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pSrcAddr))
1502 break;
1503
1504 /* Exact match? */
1505 if (intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pDstAddr))
1506 {
1507 enmSwDecision = pTab->fHostPromiscuous && fSrc == INTNETTRUNKDIR_WIRE
1508 ? INTNETSWDECISION_BROADCAST
1509 : INTNETSWDECISION_INTNET;
1510 break;
1511 }
1512 }
1513 }
1514
1515 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
1516 return enmSwDecision;
1517}
1518
1519
1520/**
1521 * Switch a unicast MAC address and return a destination table.
1522 *
1523 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1524 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1525 * @param pNetwork The network to switch on.
1526 * @param fSrc The frame source.
1527 * @param pIfSender The sender interface, NULL if trunk. Used to
1528 * prevent sending an echo to the sender.
1529 * @param pDstAddr The destination address of the frame.
1530 * @param pDstTab The destination output table.
1531 */
1532static INTNETSWDECISION intnetR0NetworkSwitchUnicast(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETIF pIfSender,
1533 PCRTMAC pDstAddr, PINTNETDSTTAB pDstTab)
1534{
1535 AssertPtr(pDstTab);
1536 Assert(!intnetR0IsMacAddrMulticast(pDstAddr));
1537
1538 /*
1539 * Grab the spinlock first and do the switching.
1540 */
1541 PINTNETMACTAB pTab = &pNetwork->MacTab;
1542 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1543 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
1544
1545 pDstTab->fTrunkDst = 0;
1546 pDstTab->pTrunk = 0;
1547 pDstTab->cIfs = 0;
1548
1549 /* Find exactly matching or promiscuous interfaces. */
1550 uint32_t cExactHits = 0;
1551 uint32_t iIfMac = pTab->cEntries;
1552 while (iIfMac-- > 0)
1553 {
1554 if (pTab->paEntries[iIfMac].fActive)
1555 {
1556 bool fExact = intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pDstAddr);
1557 if ( fExact
1558 || intnetR0IsMacAddrDummy(&pTab->paEntries[iIfMac].MacAddr)
1559 || pTab->paEntries[iIfMac].fPromiscuous)
1560 {
1561 cExactHits += fExact;
1562
1563 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1564 if (RT_LIKELY(pIf != pIfSender)) /* paranoia */
1565 {
1566 uint32_t iIfDst = pDstTab->cIfs++;
1567 pDstTab->aIfs[iIfDst].pIf = pIf;
1568 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1569 intnetR0BusyIncIf(pIf);
1570 }
1571 }
1572 }
1573 }
1574
1575 /* Does it match the host, or is the host promiscuous? */
1576 if ( fSrc != INTNETTRUNKDIR_HOST
1577 && pTab->fHostActive)
1578 {
1579 bool fExact = intnetR0AreMacAddrsEqual(&pTab->HostMac, pDstAddr);
1580 if ( fExact
1581 || intnetR0IsMacAddrDummy(&pTab->HostMac)
1582 || pTab->fHostPromiscuous)
1583 {
1584 cExactHits += fExact;
1585 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1586 }
1587 }
1588
1589 /* Hit the wire if there are no exact matches or if it's in promiscuous mode. */
1590 if ( fSrc != INTNETTRUNKDIR_WIRE
1591 && pTab->fWireActive
1592 && (!cExactHits || pTab->fWirePromiscuous)
1593 )
1594 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1595
1596 /* Grab the trunk if we're sending to it. */
1597 if (pDstTab->fTrunkDst)
1598 {
1599 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1600 pDstTab->pTrunk = pTrunk;
1601 intnetR0BusyIncTrunk(pTrunk);
1602 }
1603
1604 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
1605 return pDstTab->cIfs
1606 ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST)
1607 : (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK);
1608}
1609
1610
1611/**
1612 * Create a destination table for a broadcast frame.
1613 *
1614 * @returns INTNETSWDECISION_BROADCAST.
1615 * @param pNetwork The network to switch on.
1616 * @param fSrc The frame source.
1617 * @param pIfSender The sender interface, NULL if trunk. Used to
1618 * prevent sending an echo to the sender.
1619 * @param pDstTab The destination output table.
1620 */
1621static INTNETSWDECISION intnetR0NetworkSwitchBroadcast(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETIF pIfSender,
1622 PINTNETDSTTAB pDstTab)
1623{
1624 AssertPtr(pDstTab);
1625
1626 /*
1627 * Grab the spinlock first and record all active interfaces.
1628 */
1629 PINTNETMACTAB pTab = &pNetwork->MacTab;
1630 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1631 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
1632
1633 pDstTab->fTrunkDst = 0;
1634 pDstTab->pTrunk = 0;
1635 pDstTab->cIfs = 0;
1636
1637 /* Regular interfaces. */
1638 uint32_t iIfMac = pTab->cEntries;
1639 while (iIfMac-- > 0)
1640 {
1641 if (pTab->paEntries[iIfMac].fActive)
1642 {
1643 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1644 if (pIf != pIfSender)
1645 {
1646 uint32_t iIfDst = pDstTab->cIfs++;
1647 pDstTab->aIfs[iIfDst].pIf = pIf;
1648 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1649 intnetR0BusyIncIf(pIf);
1650 }
1651 }
1652 }
1653
1654 /* The trunk interface. */
1655 if (pTab->fHostActive)
1656 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1657 if (pTab->fWireActive)
1658 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1659 pDstTab->fTrunkDst &= ~fSrc;
1660 if (pDstTab->fTrunkDst)
1661 {
1662 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1663 pDstTab->pTrunk = pTrunk;
1664 intnetR0BusyIncTrunk(pTrunk);
1665 }
1666
1667 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
1668 return INTNETSWDECISION_BROADCAST;
1669}
1670
1671
1672/**
1673 * Create a destination table with the trunk and any promiscuous interfaces.
1674 *
1675 * This is only used in a fallback case of the level-3 switching, so we can
1676 * assume the wire as source and skip the sender interface filtering.
1677 *
1678 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1679 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1680 * @param pNetwork The network to switch on.
1681 * @param fSrc The frame source.
1682 * @param pDstTab The destination output table.
1683 */
1684static INTNETSWDECISION intnetR0NetworkSwitchTrunkAndPromisc(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETDSTTAB pDstTab)
1685{
1686 Assert(fSrc == INTNETTRUNKDIR_WIRE);
1687
1688 /*
1689 * Grab the spinlock first and do the switching.
1690 */
1691 PINTNETMACTAB pTab = &pNetwork->MacTab;
1692 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1693 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
1694
1695 pDstTab->fTrunkDst = 0;
1696 pDstTab->pTrunk = 0;
1697 pDstTab->cIfs = 0;
1698
1699 /* Find promiscuous interfaces. */
1700 uint32_t iIfMac = pTab->cEntries;
1701 while (iIfMac-- > 0)
1702 {
1703 if ( pTab->paEntries[iIfMac].fActive
1704 && pTab->paEntries[iIfMac].fPromiscuous)
1705 {
1706 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1707 uint32_t iIfDst = pDstTab->cIfs++;
1708 pDstTab->aIfs[iIfDst].pIf = pIf;
1709 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1710 intnetR0BusyIncIf(pIf);
1711 }
1712 }
1713
1714 /* The trunk interface. */
1715 if (pTab->fHostActive)
1716 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1717 if (pTab->fWireActive)
1718 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1719 pDstTab->fTrunkDst &= ~fSrc;
1720 if (pDstTab->fTrunkDst)
1721 {
1722 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1723 pDstTab->pTrunk = pTrunk;
1724 intnetR0BusyIncTrunk(pTrunk);
1725 }
1726
1727 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
1728 return !pDstTab->cIfs
1729 ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK)
1730 : (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST);
1731}
1732
1733
1734/**
1735 * Create a destination table for a trunk frame.
1736 *
1737 * @returns INTNETSWDECISION_BROADCAST.
1738 * @param pNetwork The network to switch on.
1739 * @param fSrc The frame source.
1740 * @param pDstTab The destination output table.
1741 */
1742static INTNETSWDECISION intnetR0NetworkSwitchTrunk(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETDSTTAB pDstTab)
1743{
1744 AssertPtr(pDstTab);
1745
1746 /*
1747 * Grab the spinlock first and record all active interfaces.
1748 */
1749 PINTNETMACTAB pTab= &pNetwork->MacTab;
1750 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1751 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
1752
1753 pDstTab->fTrunkDst = 0;
1754 pDstTab->pTrunk = 0;
1755 pDstTab->cIfs = 0;
1756
1757 /* The trunk interface. */
1758 if (pTab->fHostActive)
1759 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1760 if (pTab->fWireActive)
1761 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1762 pDstTab->fTrunkDst &= ~fSrc;
1763 if (pDstTab->fTrunkDst)
1764 {
1765 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1766 pDstTab->pTrunk = pTrunk;
1767 intnetR0BusyIncTrunk(pTrunk);
1768 }
1769
1770 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
1771 return pDstTab->fTrunkDst ? INTNETSWDECISION_TRUNK : INTNETSWDECISION_DROP;
1772}
1773
1774
1775/**
1776 * Wrapper around RTMemAlloc for allocating a destination table.
1777 *
1778 * @returns VINF_SUCCESS or VERR_NO_MEMORY.
1779 * @param cEntries The size given as an entry count.
1780 * @param ppDstTab Where to store the pointer (always).
1781 */
1782DECLINLINE(int) intnetR0AllocDstTab(uint32_t cEntries, PINTNETDSTTAB *ppDstTab)
1783{
1784 PINTNETDSTTAB pDstTab;
1785 *ppDstTab = pDstTab = (PINTNETDSTTAB)RTMemAlloc(RT_OFFSETOF(INTNETDSTTAB, aIfs[cEntries]));
1786 if (RT_UNLIKELY(!pDstTab))
1787 return VERR_NO_MEMORY;
1788 return VINF_SUCCESS;
1789}
1790
1791
1792/**
1793 * Ensures that there is space for another interface in the MAC address lookup
1794 * table as well as all the destination tables.
1795 *
1796 * The caller must own the create/open/destroy mutex.
1797 *
1798 * @returns VINF_SUCCESS, VERR_NO_MEMORY or VERR_OUT_OF_RANGE.
1799 * @param pNetwork The network to operate on.
1800 */
1801static int intnetR0NetworkEnsureTabSpace(PINTNETNETWORK pNetwork)
1802{
1803 /*
1804 * The cEntries and cEntriesAllocated members are only updated while
1805 * owning the big mutex, so we only need the spinlock when doing the
1806 * actual table replacing.
1807 */
1808 PINTNETMACTAB pTab = &pNetwork->MacTab;
1809 int rc = VINF_SUCCESS;
1810 AssertReturn(pTab->cEntries <= pTab->cEntriesAllocated, VERR_INTERNAL_ERROR_2);
1811 if (pTab->cEntries + 1 > pTab->cEntriesAllocated)
1812 {
1813 uint32_t const cAllocated = pTab->cEntriesAllocated + INTNET_GROW_DSTTAB_SIZE;
1814 if (cAllocated <= INTNET_MAX_IFS)
1815 {
1816 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1817
1818 /*
1819 * Resize the destination tables first, this can be kind of tedious.
1820 */
1821 for (uint32_t i = 0; i < pTab->cEntries; i++)
1822 {
1823 PINTNETIF pIf = pTab->paEntries[i].pIf; AssertPtr(pIf);
1824 PINTNETDSTTAB pNew;
1825 rc = intnetR0AllocDstTab(cAllocated, &pNew);
1826 if (RT_FAILURE(rc))
1827 break;
1828
1829 for (;;)
1830 {
1831 PINTNETDSTTAB pOld = pIf->pDstTab;
1832 if ( pOld
1833 && ASMAtomicCmpXchgPtr(&pIf->pDstTab, pNew, pOld))
1834 {
1835 RTMemFree(pOld);
1836 break;
1837 }
1838 intnetR0BusyWait(pNetwork, &pIf->cBusy);
1839 }
1840 }
1841
1842 /*
1843 * The trunk.
1844 */
1845 if ( RT_SUCCESS(rc)
1846 && pNetwork->MacTab.pTrunk)
1847 {
1848 AssertCompileAdjacentMembers(INTNETTRUNKIF, apTaskDstTabs, apIntDstTabs);
1849 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
1850 PINTNETDSTTAB * const ppEndDstTab = &pTrunk->apIntDstTabs[pTrunk->cIntDstTabs];
1851 for (PINTNETDSTTAB *ppDstTab = &pTrunk->apTaskDstTabs[0];
1852 ppDstTab != ppEndDstTab && RT_SUCCESS(rc);
1853 ppDstTab++)
1854 {
1855 PINTNETDSTTAB pNew;
1856 rc = intnetR0AllocDstTab(cAllocated, &pNew);
1857 if (RT_FAILURE(rc))
1858 break;
1859
1860 for (;;)
1861 {
1862 RTSpinlockAcquireNoInts(pTrunk->hDstTabSpinlock, &Tmp);
1863 void *pvOld = *ppDstTab;
1864 if (pvOld)
1865 *ppDstTab = pNew;
1866 RTSpinlockReleaseNoInts(pTrunk->hDstTabSpinlock, &Tmp);
1867 if (pvOld)
1868 {
1869 RTMemFree(pvOld);
1870 break;
1871 }
1872 intnetR0BusyWait(pNetwork, &pTrunk->cBusy);
1873 }
1874 }
1875 }
1876
1877 /*
1878 * The MAC Address table itself.
1879 */
1880 if (RT_SUCCESS(rc))
1881 {
1882 PINTNETMACTABENTRY paNew = (PINTNETMACTABENTRY)RTMemAlloc(sizeof(INTNETMACTABENTRY) * cAllocated);
1883 if (paNew)
1884 {
1885 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
1886
1887 PINTNETMACTABENTRY paOld = pTab->paEntries;
1888 uint32_t i = pTab->cEntries;
1889 while (i-- > 0)
1890 {
1891 paNew[i] = paOld[i];
1892
1893 paOld[i].fActive = false;
1894 paOld[i].pIf = NULL;
1895 }
1896
1897 pTab->paEntries = paNew;
1898 pTab->cEntriesAllocated = cAllocated;
1899
1900 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
1901
1902 RTMemFree(paOld);
1903 }
1904 else
1905 rc = VERR_NO_MEMORY;
1906 }
1907 }
1908 else
1909 rc = VERR_OUT_OF_RANGE;
1910 }
1911 return rc;
1912}
1913
1914
1915
1916
1917#ifdef INTNET_WITH_DHCP_SNOOPING
1918
1919/**
1920 * Snoops IP assignments and releases from the DHCPv4 traffic.
1921 *
1922 * The caller is responsible for making sure this traffic between the
1923 * BOOTPS and BOOTPC ports and validate the IP header. The UDP packet
1924 * need not be validated beyond the ports.
1925 *
1926 * @param pNetwork The network this frame was seen on.
1927 * @param pIpHdr Pointer to a valid IP header. This is for pseudo
1928 * header validation, so only the minimum header size
1929 * needs to be available and valid here.
1930 * @param pUdpHdr Pointer to the UDP header in the frame.
1931 * @param cbUdpPkt What's left of the frame when starting at the UDP header.
1932 * @param fGso Set if this is a GSO frame, clear if regular.
1933 */
1934static void intnetR0NetworkSnoopDhcp(PINTNETNETWORK pNetwork, PCRTNETIPV4 pIpHdr, PCRTNETUDP pUdpHdr, uint32_t cbUdpPkt)
1935{
1936 /*
1937 * Check if the DHCP message is valid and get the type.
1938 */
1939 if (!RTNetIPv4IsUDPValid(pIpHdr, pUdpHdr, pUdpHdr + 1, cbUdpPkt, true /*fCheckSum*/))
1940 {
1941 Log6(("Bad UDP packet\n"));
1942 return;
1943 }
1944 PCRTNETBOOTP pDhcp = (PCRTNETBOOTP)(pUdpHdr + 1);
1945 uint8_t MsgType;
1946 if (!RTNetIPv4IsDHCPValid(pUdpHdr, pDhcp, cbUdpPkt - sizeof(*pUdpHdr), &MsgType))
1947 {
1948 Log6(("Bad DHCP packet\n"));
1949 return;
1950 }
1951
1952#ifdef LOG_ENABLED
1953 /*
1954 * Log it.
1955 */
1956 const char *pszType = "unknown";
1957 switch (MsgType)
1958 {
1959 case RTNET_DHCP_MT_DISCOVER: pszType = "discover"; break;
1960 case RTNET_DHCP_MT_OFFER: pszType = "offer"; break;
1961 case RTNET_DHCP_MT_REQUEST: pszType = "request"; break;
1962 case RTNET_DHCP_MT_DECLINE: pszType = "decline"; break;
1963 case RTNET_DHCP_MT_ACK: pszType = "ack"; break;
1964 case RTNET_DHCP_MT_NAC: pszType = "nac"; break;
1965 case RTNET_DHCP_MT_RELEASE: pszType = "release"; break;
1966 case RTNET_DHCP_MT_INFORM: pszType = "inform"; break;
1967 }
1968 Log6(("DHCP msg: %d (%s) client %.6Rhxs ciaddr=%d.%d.%d.%d yiaddr=%d.%d.%d.%d\n", MsgType, pszType, &pDhcp->bp_chaddr,
1969 pDhcp->bp_ciaddr.au8[0], pDhcp->bp_ciaddr.au8[1], pDhcp->bp_ciaddr.au8[2], pDhcp->bp_ciaddr.au8[3],
1970 pDhcp->bp_yiaddr.au8[0], pDhcp->bp_yiaddr.au8[1], pDhcp->bp_yiaddr.au8[2], pDhcp->bp_yiaddr.au8[3]));
1971#endif /* LOG_EANBLED */
1972
1973 /*
1974 * Act upon the message.
1975 */
1976 switch (MsgType)
1977 {
1978#if 0
1979 case RTNET_DHCP_MT_REQUEST:
1980 /** @todo Check for valid non-broadcast requests w/ IP for any of the MACs we
1981 * know, and add the IP to the cache. */
1982 break;
1983#endif
1984
1985
1986 /*
1987 * Lookup the interface by its MAC address and insert the IPv4 address into the cache.
1988 * Delete the old client address first, just in case it changed in a renewal.
1989 */
1990 case RTNET_DHCP_MT_ACK:
1991 if (intnetR0IPv4AddrIsGood(pDhcp->bp_yiaddr))
1992 {
1993 PINTNETIF pMatchingIf = NULL;
1994 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1995 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
1996
1997 uint32_t iIf = pNetwork->MacTab.cEntries;
1998 while (iIf-- > 0)
1999 {
2000 PINTNETIF pCur = pNetwork->MacTab.paEntries[iIf].pIf;
2001 if ( intnetR0IfHasMacAddr(pCur)
2002 && !memcmp(&pCur->MacAddr, &pDhcp->bp_chaddr, sizeof(RTMAC)))
2003 {
2004 intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
2005 (PCRTNETADDRU)&pDhcp->bp_ciaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_ACK");
2006 if (!pMatchingIf)
2007 {
2008 pMatchingIf = pCur;
2009 intnetR0BusyIncIf(pMatchingIf);
2010 }
2011 }
2012 }
2013
2014 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
2015
2016 if (pMatchingIf)
2017 {
2018 intnetR0IfAddrCacheAdd(pMatchingIf, &pMatchingIf->aAddrCache[kIntNetAddrType_IPv4],
2019 (PCRTNETADDRU)&pDhcp->bp_yiaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_ACK");
2020 intnetR0BusyDecIf(pMatchingIf);
2021 }
2022 }
2023 return;
2024
2025
2026 /*
2027 * Lookup the interface by its MAC address and remove the IPv4 address(es) from the cache.
2028 */
2029 case RTNET_DHCP_MT_RELEASE:
2030 {
2031 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
2032 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
2033
2034 uint32_t iIf = pNetwork->MacTab.cEntries;
2035 while (iIf-- > 0)
2036 {
2037 PINTNETIF pCur = pNetwork->MacTab.paEntries[iIf].pIf;
2038 if ( intnetR0IfHasMacAddr(pCur)
2039 && !memcmp(&pCur->MacAddr, &pDhcp->bp_chaddr, sizeof(RTMAC)))
2040 {
2041 intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
2042 (PCRTNETADDRU)&pDhcp->bp_ciaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_RELEASE");
2043 intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
2044 (PCRTNETADDRU)&pDhcp->bp_yiaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_RELEASE");
2045 }
2046 }
2047
2048 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
2049 break;
2050 }
2051 }
2052
2053}
2054
2055
2056/**
2057 * Worker for intnetR0TrunkIfSnoopAddr that takes care of what
2058 * is likely to be a DHCP message.
2059 *
2060 * The caller has already check that the UDP source and destination ports
2061 * are BOOTPS or BOOTPC.
2062 *
2063 * @param pNetwork The network this frame was seen on.
2064 * @param pSG The gather list for the frame.
2065 */
2066static void intnetR0TrunkIfSnoopDhcp(PINTNETNETWORK pNetwork, PCINTNETSG pSG)
2067{
2068 /*
2069 * Get a pointer to a linear copy of the full packet, using the
2070 * temporary buffer if necessary.
2071 */
2072 PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((PCRTNETETHERHDR)pSG->aSegs[0].pv + 1);
2073 uint32_t cbPacket = pSG->cbTotal - sizeof(RTNETETHERHDR);
2074 if (pSG->cSegsUsed > 1)
2075 {
2076 cbPacket = RT_MIN(cbPacket, INTNETNETWORK_TMP_SIZE);
2077 Log6(("intnetR0TrunkIfSnoopDhcp: Copying IPv4/UDP/DHCP pkt %u\n", cbPacket));
2078 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
2079 return;
2080 //pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
2081 pIpHdr = (PCRTNETIPV4)pNetwork->pbTmp;
2082 }
2083
2084 /*
2085 * Validate the IP header and find the UDP packet.
2086 */
2087 if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, pSG->cbTotal - sizeof(RTNETETHERHDR), true /*fChecksum*/))
2088 {
2089 Log(("intnetR0TrunkIfSnoopDhcp: bad ip header\n"));
2090 return;
2091 }
2092 uint32_t cbIpHdr = pIpHdr->ip_hl * 4;
2093
2094 /*
2095 * Hand it over to the common DHCP snooper.
2096 */
2097 intnetR0NetworkSnoopDhcp(pNetwork, pIpHdr, (PCRTNETUDP)((uintptr_t)pIpHdr + cbIpHdr), cbPacket - cbIpHdr);
2098}
2099
2100#endif /* INTNET_WITH_DHCP_SNOOPING */
2101
2102
2103/**
2104 * Snoops up source addresses from ARP requests and purge these from the address
2105 * caches.
2106 *
2107 * The purpose of this purging is to get rid of stale addresses.
2108 *
2109 * @param pNetwork The network this frame was seen on.
2110 * @param pSG The gather list for the frame.
2111 */
2112static void intnetR0TrunkIfSnoopArp(PINTNETNETWORK pNetwork, PCINTNETSG pSG)
2113{
2114 /*
2115 * Check the minimum size first.
2116 */
2117 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4)))
2118 return;
2119
2120 /*
2121 * Copy to temporary buffer if necessary.
2122 */
2123 uint32_t cbPacket = RT_MIN(pSG->cbTotal, sizeof(RTNETARPIPV4));
2124 PCRTNETARPIPV4 pArpIPv4 = (PCRTNETARPIPV4)((uintptr_t)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
2125 if ( pSG->cSegsUsed != 1
2126 && pSG->aSegs[0].cb < cbPacket)
2127 {
2128 if ( (pSG->fFlags & (INTNETSG_FLAGS_ARP_IPV4 | INTNETSG_FLAGS_PKT_CP_IN_TMP))
2129 != (INTNETSG_FLAGS_ARP_IPV4 | INTNETSG_FLAGS_PKT_CP_IN_TMP)
2130 && !intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
2131 return;
2132 pArpIPv4 = (PCRTNETARPIPV4)pNetwork->pbTmp;
2133 }
2134
2135 /*
2136 * Ignore packets which doesn't interest us or we perceive as malformed.
2137 */
2138 if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
2139 || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
2140 || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
2141 || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
2142 return;
2143 uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
2144 if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
2145 && ar_oper != RTNET_ARPOP_REPLY))
2146 {
2147 Log6(("ts-ar: op=%#x\n", ar_oper));
2148 return;
2149 }
2150
2151 /*
2152 * Delete the source address if it's OK.
2153 */
2154 if ( !intnetR0IsMacAddrMulticast(&pArpIPv4->ar_sha)
2155 && ( pArpIPv4->ar_sha.au16[0]
2156 || pArpIPv4->ar_sha.au16[1]
2157 || pArpIPv4->ar_sha.au16[2])
2158 && intnetR0IPv4AddrIsGood(pArpIPv4->ar_spa))
2159 {
2160 Log6(("ts-ar: %d.%d.%d.%d / %.6Rhxs\n", pArpIPv4->ar_spa.au8[0], pArpIPv4->ar_spa.au8[1],
2161 pArpIPv4->ar_spa.au8[2], pArpIPv4->ar_spa.au8[3], &pArpIPv4->ar_sha));
2162 intnetR0NetworkAddrCacheDelete(pNetwork, (PCRTNETADDRU)&pArpIPv4->ar_spa,
2163 kIntNetAddrType_IPv4, sizeof(pArpIPv4->ar_spa), "tif/arp");
2164 }
2165}
2166
2167
2168#ifdef INTNET_WITH_DHCP_SNOOPING
2169/**
2170 * Snoop up addresses from ARP and DHCP traffic from frames comming
2171 * over the trunk connection.
2172 *
2173 * The caller is responsible for do some basic filtering before calling
2174 * this function.
2175 * For IPv4 this means checking against the minimum DHCPv4 frame size.
2176 *
2177 * @param pNetwork The network.
2178 * @param pSG The SG list for the frame.
2179 * @param EtherType The Ethertype of the frame.
2180 */
2181static void intnetR0TrunkIfSnoopAddr(PINTNETNETWORK pNetwork, PCINTNETSG pSG, uint16_t EtherType)
2182{
2183 switch (EtherType)
2184 {
2185 case RTNET_ETHERTYPE_IPV4:
2186 {
2187 uint32_t cbIpHdr;
2188 uint8_t b;
2189
2190 Assert(pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN);
2191 if (pSG->aSegs[0].cb >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN)
2192 {
2193 /* check if the protocol is UDP */
2194 PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((uint8_t const *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
2195 if (pIpHdr->ip_p != RTNETIPV4_PROT_UDP)
2196 return;
2197
2198 /* get the TCP header length */
2199 cbIpHdr = pIpHdr->ip_hl * 4;
2200 }
2201 else
2202 {
2203 /* check if the protocol is UDP */
2204 if ( intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV4, ip_p))
2205 != RTNETIPV4_PROT_UDP)
2206 return;
2207
2208 /* get the TCP header length */
2209 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + 0); /* (IPv4 first byte, a bitfield) */
2210 cbIpHdr = (b & 0x0f) * 4;
2211 }
2212 if (cbIpHdr < RTNETIPV4_MIN_LEN)
2213 return;
2214
2215 /* compare the ports. */
2216 if (pSG->aSegs[0].cb >= sizeof(RTNETETHERHDR) + cbIpHdr + RTNETUDP_MIN_LEN)
2217 {
2218 PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uint8_t const *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR) + cbIpHdr);
2219 if ( ( RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPS
2220 && RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPS)
2221 || ( RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPC
2222 && RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPC))
2223 return;
2224 }
2225 else
2226 {
2227 /* get the lower byte of the UDP source port number. */
2228 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_sport) + 1);
2229 if ( b != RTNETIPV4_PORT_BOOTPS
2230 && b != RTNETIPV4_PORT_BOOTPC)
2231 return;
2232 uint8_t SrcPort = b;
2233 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_sport));
2234 if (b)
2235 return;
2236
2237 /* get the lower byte of the UDP destination port number. */
2238 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_dport) + 1);
2239 if ( b != RTNETIPV4_PORT_BOOTPS
2240 && b != RTNETIPV4_PORT_BOOTPC)
2241 return;
2242 if (b == SrcPort)
2243 return;
2244 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_dport));
2245 if (b)
2246 return;
2247 }
2248 intnetR0TrunkIfSnoopDhcp(pNetwork, pSG);
2249 break;
2250 }
2251
2252 case RTNET_ETHERTYPE_IPV6:
2253 {
2254 /** @todo IPv6: Check for ICMPv6. It looks like type 133 (Router solicitation) might
2255 * need to be edited. Check out how NDP works... */
2256 break;
2257 }
2258
2259 case RTNET_ETHERTYPE_ARP:
2260 intnetR0TrunkIfSnoopArp(pNetwork, pSG);
2261 break;
2262 }
2263}
2264#endif /* INTNET_WITH_DHCP_SNOOPING */
2265
2266
2267/**
2268 * Deals with an IPv4 packet.
2269 *
2270 * This will fish out the source IP address and add it to the cache.
2271 * Then it will look for DHCPRELEASE requests (?) and anything else
2272 * that we migh find useful later.
2273 *
2274 * @param pIf The interface that's sending the frame.
2275 * @param pIpHdr Pointer to the IPv4 header in the frame.
2276 * @param cbPacket The size of the packet, or more correctly the
2277 * size of the frame without the ethernet header.
2278 * @param fGso Set if this is a GSO frame, clear if regular.
2279 */
2280static void intnetR0IfSnoopIPv4SourceAddr(PINTNETIF pIf, PCRTNETIPV4 pIpHdr, uint32_t cbPacket, bool fGso)
2281{
2282 /*
2283 * Check the header size first to prevent access invalid data.
2284 */
2285 if (cbPacket < RTNETIPV4_MIN_LEN)
2286 return;
2287 uint32_t cbHdr = (uint32_t)pIpHdr->ip_hl * 4;
2288 if ( cbHdr < RTNETIPV4_MIN_LEN
2289 || cbPacket < cbHdr)
2290 return;
2291
2292 /*
2293 * If the source address is good (not broadcast or my network) and
2294 * not already in the address cache of the sender, add it. Validate
2295 * the IP header before adding it.
2296 */
2297 bool fValidatedIpHdr = false;
2298 RTNETADDRU Addr;
2299 Addr.IPv4 = pIpHdr->ip_src;
2300 if ( intnetR0IPv4AddrIsGood(Addr.IPv4)
2301 && intnetR0IfAddrCacheLookupLikely(&pIf->aAddrCache[kIntNetAddrType_IPv4], &Addr, sizeof(Addr.IPv4)) < 0)
2302 {
2303 if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, cbPacket, !fGso /*fChecksum*/))
2304 {
2305 Log(("intnetR0IfSnoopIPv4SourceAddr: bad ip header\n"));
2306 return;
2307 }
2308 intnetR0IfAddrCacheAddIt(pIf, &pIf->aAddrCache[kIntNetAddrType_IPv4], &Addr, "if/ipv4");
2309 fValidatedIpHdr = true;
2310 }
2311
2312#ifdef INTNET_WITH_DHCP_SNOOPING
2313 /*
2314 * Check for potential DHCP packets.
2315 */
2316 if ( pIpHdr->ip_p == RTNETIPV4_PROT_UDP /* DHCP is UDP. */
2317 && cbPacket >= cbHdr + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN /* Min DHCP packet len. */
2318 && !fGso) /* GSO is not applicable to DHCP traffic. */
2319 {
2320 PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uint8_t const *)pIpHdr + cbHdr);
2321 if ( ( RT_BE2H_U16(pUdpHdr->uh_dport) == RTNETIPV4_PORT_BOOTPS
2322 || RT_BE2H_U16(pUdpHdr->uh_sport) == RTNETIPV4_PORT_BOOTPS)
2323 && ( RT_BE2H_U16(pUdpHdr->uh_sport) == RTNETIPV4_PORT_BOOTPC
2324 || RT_BE2H_U16(pUdpHdr->uh_dport) == RTNETIPV4_PORT_BOOTPC))
2325 {
2326 if ( fValidatedIpHdr
2327 || RTNetIPv4IsHdrValid(pIpHdr, cbPacket, cbPacket, !fGso /*fChecksum*/))
2328 intnetR0NetworkSnoopDhcp(pIf->pNetwork, pIpHdr, pUdpHdr, cbPacket - cbHdr);
2329 else
2330 Log(("intnetR0IfSnoopIPv4SourceAddr: bad ip header (dhcp)\n"));
2331 }
2332 }
2333#endif /* INTNET_WITH_DHCP_SNOOPING */
2334}
2335
2336
2337/**
2338 * Snoop up source addresses from an ARP request or reply.
2339 *
2340 * @param pIf The interface that's sending the frame.
2341 * @param pHdr The ARP header.
2342 * @param cbPacket The size of the packet (migth be larger than the ARP
2343 * request 'cause of min ethernet frame size).
2344 * @param pfSgFlags Pointer to the SG flags. This is used to tag the packet so we
2345 * don't have to repeat the frame parsing in intnetR0TrunkIfSend.
2346 */
2347static void intnetR0IfSnoopArpAddr(PINTNETIF pIf, PCRTNETARPIPV4 pArpIPv4, uint32_t cbPacket, uint16_t *pfSgFlags)
2348{
2349 /*
2350 * Ignore packets which doesn't interest us or we perceive as malformed.
2351 */
2352 if (RT_UNLIKELY(cbPacket < sizeof(RTNETARPIPV4)))
2353 return;
2354 if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
2355 || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
2356 || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
2357 || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
2358 return;
2359 uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
2360 if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
2361 && ar_oper != RTNET_ARPOP_REPLY))
2362 {
2363 Log6(("ar_oper=%#x\n", ar_oper));
2364 return;
2365 }
2366
2367 /*
2368 * Tag the SG as ARP IPv4 for later editing, then check for addresses
2369 * which can be removed or added to the address cache of the sender.
2370 */
2371 *pfSgFlags |= INTNETSG_FLAGS_ARP_IPV4;
2372
2373 if ( ar_oper == RTNET_ARPOP_REPLY
2374 && !intnetR0IsMacAddrMulticast(&pArpIPv4->ar_tha)
2375 && ( pArpIPv4->ar_tha.au16[0]
2376 || pArpIPv4->ar_tha.au16[1]
2377 || pArpIPv4->ar_tha.au16[2])
2378 && intnetR0IPv4AddrIsGood(pArpIPv4->ar_tpa))
2379 intnetR0IfAddrCacheDelete(pIf, &pIf->aAddrCache[kIntNetAddrType_IPv4],
2380 (PCRTNETADDRU)&pArpIPv4->ar_tpa, sizeof(RTNETADDRIPV4), "if/arp");
2381
2382 if ( !memcmp(&pArpIPv4->ar_sha, &pIf->MacAddr, sizeof(RTMAC))
2383 && intnetR0IPv4AddrIsGood(pArpIPv4->ar_spa))
2384 intnetR0IfAddrCacheAdd(pIf, &pIf->aAddrCache[kIntNetAddrType_IPv4],
2385 (PCRTNETADDRU)&pArpIPv4->ar_spa, sizeof(RTNETADDRIPV4), "if/arp");
2386}
2387
2388
2389
2390/**
2391 * Checks packets send by a normal interface for new network
2392 * layer addresses.
2393 *
2394 * @param pIf The interface that's sending the frame.
2395 * @param pbFrame The frame.
2396 * @param cbFrame The size of the frame.
2397 * @param fGso Set if this is a GSO frame, clear if regular.
2398 * @param pfSgFlags Pointer to the SG flags. This is used to tag the packet so we
2399 * don't have to repeat the frame parsing in intnetR0TrunkIfSend.
2400 */
2401static void intnetR0IfSnoopAddr(PINTNETIF pIf, uint8_t const *pbFrame, uint32_t cbFrame, bool fGso, uint16_t *pfSgFlags)
2402{
2403 /*
2404 * Fish out the ethertype and look for stuff we can handle.
2405 */
2406 if (cbFrame <= sizeof(RTNETETHERHDR))
2407 return;
2408 cbFrame -= sizeof(RTNETETHERHDR);
2409
2410 uint16_t EtherType = RT_H2BE_U16(((PCRTNETETHERHDR)pbFrame)->EtherType);
2411 switch (EtherType)
2412 {
2413 case RTNET_ETHERTYPE_IPV4:
2414 intnetR0IfSnoopIPv4SourceAddr(pIf, (PCRTNETIPV4)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, fGso);
2415 break;
2416#if 0 /** @todo IntNet: implement IPv6 for wireless MAC sharing. */
2417 case RTNET_ETHERTYPE_IPV6:
2418 /** @todo IPv6: Check for ICMPv6. It looks like type 133 (Router solicitation) might
2419 * need to be edited. Check out how NDP works... */
2420 intnetR0IfSnoopIPv6SourceAddr(pIf, (PCINTNETIPV6)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, fGso, pfSgFlags);
2421 break;
2422#endif
2423#if 0 /** @todo IntNet: implement IPX for wireless MAC sharing? */
2424 case RTNET_ETHERTYPE_IPX_1:
2425 case RTNET_ETHERTYPE_IPX_2:
2426 case RTNET_ETHERTYPE_IPX_3:
2427 intnetR0IfSnoopIpxSourceAddr(pIf, (PCINTNETIPX)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, pfSgFlags);
2428 break;
2429#endif
2430 case RTNET_ETHERTYPE_ARP:
2431 intnetR0IfSnoopArpAddr(pIf, (PCRTNETARPIPV4)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, pfSgFlags);
2432 break;
2433 }
2434}
2435
2436
2437/**
2438 * Writes a frame packet to the ring buffer.
2439 *
2440 * @returns VBox status code.
2441 * @param pBuf The buffer.
2442 * @param pRingBuf The ring buffer to read from.
2443 * @param pSG The gather list.
2444 * @param pNewDstMac Set the destination MAC address to the address if specified.
2445 */
2446static int intnetR0RingWriteFrame(PINTNETRINGBUF pRingBuf, PCINTNETSG pSG, PCRTMAC pNewDstMac)
2447{
2448 PINTNETHDR pHdr = NULL; /* shut up gcc*/
2449 void *pvDst = NULL; /* ditto */
2450 int rc;
2451 if (pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
2452 rc = IntNetRingAllocateFrame(pRingBuf, pSG->cbTotal, &pHdr, &pvDst);
2453 else
2454 rc = IntNetRingAllocateGsoFrame(pRingBuf, pSG->cbTotal, &pSG->GsoCtx, &pHdr, &pvDst);
2455 if (RT_SUCCESS(rc))
2456 {
2457 IntNetSgRead(pSG, pvDst);
2458 if (pNewDstMac)
2459 ((PRTNETETHERHDR)pvDst)->DstMac = *pNewDstMac;
2460
2461 IntNetRingCommitFrame(pRingBuf, pHdr);
2462 return VINF_SUCCESS;
2463 }
2464 return rc;
2465}
2466
2467
2468/**
2469 * Sends a frame to a specific interface.
2470 *
2471 * @param pIf The interface.
2472 * @param pIfSender The interface sending the frame. This is NULL if it's the trunk.
2473 * @param pSG The gather buffer which data is being sent to the interface.
2474 * @param pNewDstMac Set the destination MAC address to the address if specified.
2475 */
2476static void intnetR0IfSend(PINTNETIF pIf, PINTNETIF pIfSender, PINTNETSG pSG, PCRTMAC pNewDstMac)
2477{
2478 /*
2479 * Grab the receive/producer lock and copy over the frame.
2480 */
2481 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
2482 RTSpinlockAcquireNoInts(pIf->hRecvInSpinlock, &Tmp);
2483 int rc = intnetR0RingWriteFrame(&pIf->pIntBuf->Recv, pSG, pNewDstMac);
2484 RTSpinlockReleaseNoInts(pIf->hRecvInSpinlock, &Tmp);
2485 if (RT_SUCCESS(rc))
2486 {
2487 pIf->cYields = 0;
2488 RTSemEventSignal(pIf->hRecvEvent);
2489 return;
2490 }
2491
2492 Log(("intnetR0IfSend: overflow cb=%d hIf=%RX32\n", pSG->cbTotal, pIf->hIf));
2493
2494 /*
2495 * Scheduling hack, for unicore machines primarily.
2496 */
2497 if ( pIf->fActive
2498 && pIf->cYields < 4 /* just twice */
2499 && pIfSender /* but not if it's from the trunk */
2500 && RTThreadPreemptIsEnabled(NIL_RTTHREAD)
2501 )
2502 {
2503 unsigned cYields = 2;
2504 while (--cYields > 0)
2505 {
2506 RTSemEventSignal(pIf->hRecvEvent);
2507 RTThreadYield();
2508
2509 RTSpinlockAcquireNoInts(pIf->hRecvInSpinlock, &Tmp);
2510 rc = intnetR0RingWriteFrame(&pIf->pIntBuf->Recv, pSG, pNewDstMac);
2511 RTSpinlockReleaseNoInts(pIf->hRecvInSpinlock, &Tmp);
2512 if (RT_SUCCESS(rc))
2513 {
2514 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatYieldsOk);
2515 RTSemEventSignal(pIf->hRecvEvent);
2516 return;
2517 }
2518 pIf->cYields++;
2519 }
2520 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatYieldsNok);
2521 }
2522
2523 /* ok, the frame is lost. */
2524 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatLost);
2525 RTSemEventSignal(pIf->hRecvEvent);
2526}
2527
2528
2529/**
2530 * Fallback path that does the GSO segmenting before passing the frame on to the
2531 * trunk interface.
2532 *
2533 * The caller holds the trunk lock.
2534 *
2535 * @param pThis The trunk.
2536 * @param pIfSender The IF sending the frame.
2537 * @param pSG Pointer to the gather list.
2538 * @param fDst The destination flags.
2539 */
2540static int intnetR0TrunkIfSendGsoFallback(PINTNETTRUNKIF pThis, PINTNETIF pIfSender, PINTNETSG pSG, uint32_t fDst)
2541{
2542 /*
2543 * Since we're only using this for GSO frame comming from the internal
2544 * network interfaces and never the trunk, we can assume there is only
2545 * one segment. This simplifies the code quite a bit.
2546 */
2547 Assert(PDMNetGsoIsValid(&pSG->GsoCtx, sizeof(pSG->GsoCtx), pSG->cbTotal));
2548 AssertReturn(pSG->cSegsUsed == 1, VERR_INTERNAL_ERROR_4);
2549
2550 union
2551 {
2552 uint8_t abBuf[sizeof(INTNETSG) + sizeof(INTNETSEG)];
2553 INTNETSG SG;
2554 } u;
2555
2556 /*
2557 * Carve out the frame segments with the header and frame in different
2558 * scatter / gather segments.
2559 */
2560 uint32_t const cSegs = PDMNetGsoCalcSegmentCount(&pSG->GsoCtx, pSG->cbTotal);
2561 for (uint32_t iSeg = 0; iSeg < cSegs; iSeg++)
2562 {
2563 uint32_t cbSegPayload;
2564 uint32_t offSegPayload = PDMNetGsoCarveSegment(&pSG->GsoCtx, (uint8_t *)pSG->aSegs[0].pv, pSG->cbTotal, iSeg, cSegs,
2565 pThis->abGsoHdrs, &cbSegPayload);
2566
2567 IntNetSgInitTempSegs(&u.SG, pSG->GsoCtx.cbHdrs + cbSegPayload, 2, 2);
2568 u.SG.aSegs[0].Phys = NIL_RTHCPHYS;
2569 u.SG.aSegs[0].pv = pThis->abGsoHdrs;
2570 u.SG.aSegs[0].cb = pSG->GsoCtx.cbHdrs;
2571 u.SG.aSegs[1].Phys = NIL_RTHCPHYS;
2572 u.SG.aSegs[1].pv = (uint8_t *)pSG->aSegs[0].pv + offSegPayload;
2573 u.SG.aSegs[1].cb = (uint32_t)cbSegPayload;
2574
2575 int rc = pThis->pIfPort->pfnXmit(pThis->pIfPort, pIfSender->pvIfData, &u.SG, fDst);
2576 if (RT_FAILURE(rc))
2577 return rc;
2578 }
2579 return VINF_SUCCESS;
2580}
2581
2582
2583/**
2584 * Checks if any of the given trunk destinations can handle this kind of GSO SG.
2585 *
2586 * @returns true if it can, false if it cannot.
2587 * @param pThis The trunk.
2588 * @param pSG The scatter / gather buffer.
2589 * @param fDst The destination mask.
2590 */
2591DECLINLINE(bool) intnetR0TrunkIfCanHandleGsoFrame(PINTNETTRUNKIF pThis, PINTNETSG pSG, uint32_t fDst)
2592{
2593 uint8_t u8Type = pSG->GsoCtx.u8Type;
2594 AssertReturn(u8Type < 32, false); /* paranoia */
2595 uint32_t fMask = RT_BIT_32(u8Type);
2596
2597 if (fDst == INTNETTRUNKDIR_HOST)
2598 return !!(pThis->fHostGsoCapabilites & fMask);
2599 if (fDst == INTNETTRUNKDIR_WIRE)
2600 return !!(pThis->fWireGsoCapabilites & fMask);
2601 Assert(fDst == (INTNETTRUNKDIR_WIRE | INTNETTRUNKDIR_HOST));
2602 return !!(pThis->fHostGsoCapabilites & pThis->fWireGsoCapabilites & fMask);
2603}
2604
2605
2606/**
2607 * Sends a frame down the trunk.
2608 *
2609 * @param pThis The trunk.
2610 * @param pNetwork The network the frame is being sent to.
2611 * @param pIfSender The IF sending the frame. Used for MAC address
2612 * checks in shared MAC mode.
2613 * @param fDst The destination flags.
2614 * @param pSG Pointer to the gather list.
2615 */
2616static void intnetR0TrunkIfSend(PINTNETTRUNKIF pThis, PINTNETNETWORK pNetwork, PINTNETIF pIfSender,
2617 uint32_t fDst, PINTNETSG pSG)
2618{
2619 /*
2620 * Quick sanity check.
2621 */
2622 AssertPtr(pThis);
2623 AssertPtr(pNetwork);
2624 AssertPtr(pIfSender);
2625 AssertPtr(pSG);
2626 Assert(fDst);
2627 AssertReturnVoid(pThis->pIfPort);
2628
2629 /*
2630 * Edit the frame if we're sharing the MAC address with the host on the wire.
2631 *
2632 * If the frame is headed for both the host and the wire, we'll have to send
2633 * it to the host before making any modifications, and force the OS specific
2634 * backend to copy it. We do this by marking it as TEMP (which is always the
2635 * case right now).
2636 */
2637 if ( (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
2638 && (fDst & INTNETTRUNKDIR_WIRE))
2639 {
2640 /*
2641 * Dispatch it to the host before making changes.
2642 */
2643 if (fDst & INTNETTRUNKDIR_HOST)
2644 {
2645 Assert(pSG->fFlags & INTNETSG_FLAGS_TEMP); /* make sure copy is forced */
2646 intnetR0TrunkIfSend(pThis, pNetwork, pIfSender, INTNETTRUNKDIR_HOST, pSG);
2647 fDst &= ~INTNETTRUNKDIR_HOST;
2648 }
2649
2650 /*
2651 * Edit the source address so that it it's the same as the host.
2652 */
2653 /* ASSUME frame from IntNetR0IfSend! */
2654 AssertReturnVoid(pSG->cSegsUsed == 1);
2655 AssertReturnVoid(pSG->cbTotal >= sizeof(RTNETETHERHDR));
2656 AssertReturnVoid(pIfSender);
2657 PRTNETETHERHDR pEthHdr = (PRTNETETHERHDR)pSG->aSegs[0].pv;
2658
2659 pEthHdr->SrcMac = pThis->MacAddr;
2660
2661 /*
2662 * Deal with tags from the snooping phase.
2663 */
2664 if (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4)
2665 {
2666 /*
2667 * APR IPv4: replace hardware (MAC) addresses because these end up
2668 * in ARP caches. So, if we don't the other machiens will
2669 * send the packets to the MAC address of the guest
2670 * instead of the one of the host, which won't work on
2671 * wireless of course...
2672 */
2673 PRTNETARPIPV4 pArp = (PRTNETARPIPV4)(pEthHdr + 1);
2674 if (!memcmp(&pArp->ar_sha, &pIfSender->MacAddr, sizeof(RTMAC)))
2675 {
2676 Log6(("tw: ar_sha %.6Rhxs -> %.6Rhxs\n", &pArp->ar_sha, &pThis->MacAddr));
2677 pArp->ar_sha = pThis->MacAddr;
2678 }
2679 if (!memcmp(&pArp->ar_tha, &pIfSender->MacAddr, sizeof(RTMAC))) /* just in case... */
2680 {
2681 Log6(("tw: ar_tha %.6Rhxs -> %.6Rhxs\n", &pArp->ar_tha, &pThis->MacAddr));
2682 pArp->ar_tha = pThis->MacAddr;
2683 }
2684 }
2685 //else if (pSG->fFlags & INTNETSG_FLAGS_ICMPV6_NDP)
2686 //{ /// @todo move the editing into a different function
2687 //}
2688 }
2689
2690 /*
2691 * Send the frame, handling the GSO fallback .
2692 * .
2693 * Note! The trunk implementation will re-check that the trunk is active .
2694 * before sending, so we don't have to duplicate that effort here.
2695 */
2696 STAM_REL_PROFILE_START(&pIfSender->pIntBuf->StatSend2, a);
2697 int rc;
2698 if ( pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID
2699 || intnetR0TrunkIfCanHandleGsoFrame(pThis, pSG, fDst) )
2700 rc = pThis->pIfPort->pfnXmit(pThis->pIfPort, pIfSender->pvIfData, pSG, fDst);
2701 else
2702 rc = intnetR0TrunkIfSendGsoFallback(pThis, pIfSender, pSG, fDst);
2703 STAM_REL_PROFILE_STOP(&pIfSender->pIntBuf->StatSend2, a);
2704
2705 /** @todo failure statistics? */
2706 Log2(("intnetR0TrunkIfSend: %Rrc fDst=%d\n", rc, fDst)); NOREF(rc);
2707}
2708
2709
2710/**
2711 * Edits an ARP packet arriving from the wire via the trunk connection.
2712 *
2713 * @param pNetwork The network the frame is being sent to.
2714 * @param pSG Pointer to the gather list for the frame.
2715 * The flags and data content may be updated.
2716 * @param pEthHdr Pointer to the ethernet header. This may also be
2717 * updated if it's a unicast...
2718 */
2719static void intnetR0NetworkEditArpFromWire(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
2720{
2721 /*
2722 * Check the minimum size and get a linear copy of the thing to work on,
2723 * using the temporary buffer if necessary.
2724 */
2725 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4)))
2726 return;
2727 PRTNETARPIPV4 pArpIPv4 = (PRTNETARPIPV4)((uint8_t *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
2728 if ( pSG->cSegsUsed != 1
2729 && pSG->aSegs[0].cb < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4))
2730 {
2731 Log6(("fw: Copying ARP pkt %u\n", sizeof(RTNETARPIPV4)));
2732 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), sizeof(RTNETARPIPV4), pNetwork->pbTmp))
2733 return;
2734 pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
2735 pArpIPv4 = (PRTNETARPIPV4)pNetwork->pbTmp;
2736 }
2737
2738 /*
2739 * Ignore packets which doesn't interest us or we perceive as malformed.
2740 */
2741 if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
2742 || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
2743 || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
2744 || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
2745 return;
2746 uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
2747 if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
2748 && ar_oper != RTNET_ARPOP_REPLY))
2749 {
2750 Log6(("ar_oper=%#x\n", ar_oper));
2751 return;
2752 }
2753
2754 /* Tag it as ARP IPv4. */
2755 pSG->fFlags |= INTNETSG_FLAGS_ARP_IPV4;
2756
2757 /*
2758 * The thing we're interested in here is a reply to a query made by a guest
2759 * since we modified the MAC in the initial request the guest made.
2760 */
2761 if ( ar_oper == RTNET_ARPOP_REPLY
2762 && !memcmp(&pArpIPv4->ar_tha, &pNetwork->MacTab.pTrunk->MacAddr, sizeof(RTMAC)))
2763 {
2764 PINTNETIF pIf = intnetR0NetworkAddrCacheLookupIf(pNetwork, (PCRTNETADDRU)&pArpIPv4->ar_tpa,
2765 kIntNetAddrType_IPv4, sizeof(pArpIPv4->ar_tpa));
2766 if (pIf)
2767 {
2768 Log6(("fw: ar_tha %.6Rhxs -> %.6Rhxs\n", &pArpIPv4->ar_tha, &pIf->MacAddr));
2769 pArpIPv4->ar_tha = pIf->MacAddr;
2770 if (!memcmp(&pEthHdr->DstMac, &pNetwork->MacTab.pTrunk->MacAddr, sizeof(RTMAC)))
2771 {
2772 Log6(("fw: DstMac %.6Rhxs -> %.6Rhxs\n", &pEthHdr->DstMac, &pIf->MacAddr));
2773 pEthHdr->DstMac = pIf->MacAddr;
2774 if ((void *)pEthHdr != pSG->aSegs[0].pv)
2775 intnetR0SgWritePart(pSG, RT_OFFSETOF(RTNETETHERHDR, DstMac), sizeof(RTMAC), &pIf->MacAddr);
2776 }
2777 intnetR0BusyDecIf(pIf);
2778
2779 /* Write back the packet if we've been making changes to a buffered copy. */
2780 if (pSG->fFlags & INTNETSG_FLAGS_PKT_CP_IN_TMP)
2781 intnetR0SgWritePart(pSG, sizeof(RTNETETHERHDR), sizeof(PRTNETARPIPV4), pArpIPv4);
2782 }
2783 }
2784}
2785
2786
2787/**
2788 * Detects and edits an DHCP packet arriving from the internal net.
2789 *
2790 * @param pNetwork The network the frame is being sent to.
2791 * @param pSG Pointer to the gather list for the frame.
2792 * The flags and data content may be updated.
2793 * @param pEthHdr Pointer to the ethernet header. This may also be
2794 * updated if it's a unicast...
2795 */
2796static void intnetR0NetworkEditDhcpFromIntNet(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
2797{
2798 /*
2799 * Check the minimum size and get a linear copy of the thing to work on,
2800 * using the temporary buffer if necessary.
2801 */
2802 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN))
2803 return;
2804 /*
2805 * Get a pointer to a linear copy of the full packet, using the
2806 * temporary buffer if necessary.
2807 */
2808 PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((PCRTNETETHERHDR)pSG->aSegs[0].pv + 1);
2809 uint32_t cbPacket = pSG->cbTotal - sizeof(RTNETETHERHDR);
2810 if (pSG->cSegsUsed > 1)
2811 {
2812 cbPacket = RT_MIN(cbPacket, INTNETNETWORK_TMP_SIZE);
2813 Log6(("intnetR0NetworkEditDhcpFromIntNet: Copying IPv4/UDP/DHCP pkt %u\n", cbPacket));
2814 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
2815 return;
2816 //pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
2817 pIpHdr = (PCRTNETIPV4)pNetwork->pbTmp;
2818 }
2819
2820 /*
2821 * Validate the IP header and find the UDP packet.
2822 */
2823 if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, pSG->cbTotal - sizeof(RTNETETHERHDR), true /*fCheckSum*/))
2824 {
2825 Log6(("intnetR0NetworkEditDhcpFromIntNet: bad ip header\n"));
2826 return;
2827 }
2828 size_t cbIpHdr = pIpHdr->ip_hl * 4;
2829 if ( pIpHdr->ip_p != RTNETIPV4_PROT_UDP /* DHCP is UDP. */
2830 || cbPacket < cbIpHdr + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN) /* Min DHCP packet len */
2831 return;
2832
2833 size_t cbUdpPkt = cbPacket - cbIpHdr;
2834 PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uintptr_t)pIpHdr + cbIpHdr);
2835 /* We are only interested in DHCP packets coming from client to server. */
2836 if ( RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPS
2837 || RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPC)
2838 return;
2839
2840 /*
2841 * Check if the DHCP message is valid and get the type.
2842 */
2843 if (!RTNetIPv4IsUDPValid(pIpHdr, pUdpHdr, pUdpHdr + 1, cbUdpPkt, true /*fCheckSum*/))
2844 {
2845 Log6(("intnetR0NetworkEditDhcpFromIntNet: Bad UDP packet\n"));
2846 return;
2847 }
2848 PCRTNETBOOTP pDhcp = (PCRTNETBOOTP)(pUdpHdr + 1);
2849 uint8_t MsgType;
2850 if (!RTNetIPv4IsDHCPValid(pUdpHdr, pDhcp, cbUdpPkt - sizeof(*pUdpHdr), &MsgType))
2851 {
2852 Log6(("intnetR0NetworkEditDhcpFromIntNet: Bad DHCP packet\n"));
2853 return;
2854 }
2855
2856 switch (MsgType)
2857 {
2858 case RTNET_DHCP_MT_DISCOVER:
2859 case RTNET_DHCP_MT_REQUEST:
2860 Log6(("intnetR0NetworkEditDhcpFromIntNet: Setting broadcast flag in DHCP %#x, previously %x\n", MsgType, pDhcp->bp_flags));
2861 if (!(pDhcp->bp_flags & RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST)))
2862 {
2863 /* Patch flags */
2864 uint16_t uFlags = pDhcp->bp_flags | RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST);
2865 intnetR0SgWritePart(pSG, (uintptr_t)&pDhcp->bp_flags - (uintptr_t)pIpHdr + sizeof(RTNETETHERHDR), sizeof(uFlags), &uFlags);
2866 /* Patch UDP checksum */
2867 uint32_t uChecksum = (uint32_t)~pUdpHdr->uh_sum + RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST);
2868 while (uChecksum >> 16)
2869 uChecksum = (uChecksum >> 16) + (uChecksum & 0xFFFF);
2870 uChecksum = ~uChecksum;
2871 intnetR0SgWritePart(pSG, (uintptr_t)&pUdpHdr->uh_sum - (uintptr_t)pIpHdr + sizeof(RTNETETHERHDR), sizeof(pUdpHdr->uh_sum), &uChecksum);
2872 }
2873 break;
2874 }
2875}
2876
2877
2878/**
2879 * Checks if the callers context is okay for sending to the specified
2880 * destinations.
2881 *
2882 * @returns true if it's okay, false if it isn't.
2883 * @param pNetwork The network.
2884 * @param pIfSender The interface sending or NULL if it's the trunk.
2885 * @param pDstTab The destination table.
2886 */
2887DECLINLINE(bool) intnetR0NetworkIsContextOk(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, PCINTNETDSTTAB pDstTab)
2888{
2889 /* Sending to the trunk is the problematic path. If the trunk is the
2890 sender we won't be sending to it, so no problem..
2891 Note! fTrunkDst may be set event if if the trunk is the sender. */
2892 if (!pIfSender)
2893 return true;
2894
2895 uint32_t const fTrunkDst = pDstTab->fTrunkDst;
2896 if (!fTrunkDst)
2897 return true;
2898
2899 /* ASSUMES: that the trunk won't change its report while we're checking. */
2900 PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
2901 if ((fTrunkDst & pTrunk->fNoPreemptDsts) == fTrunkDst)
2902 return true;
2903
2904 /* ASSUMES: That a preemption test detects HWACCM contexts. (Will work on
2905 non-preemptive systems as well.) */
2906 if (RTThreadPreemptIsEnabled(NIL_RTTHREAD))
2907 return true;
2908 return false;
2909}
2910
2911
2912/**
2913 * Checks if the callers context is okay for doing a broadcast given the
2914 * specified source.
2915 *
2916 * @returns true if it's okay, false if it isn't.
2917 * @param pNetwork The network.
2918 * @param fSrc The source of the packet. (0 (intnet),
2919 * INTNETTRUNKDIR_HOST or INTNETTRUNKDIR_WIRE).
2920 */
2921DECLINLINE(bool) intnetR0NetworkIsContextOkForBroadcast(PINTNETNETWORK pNetwork, uint32_t fSrc)
2922{
2923 /* Sending to the trunk is the problematic path. If the trunk is the
2924 sender we won't be sending to it, so no problem. */
2925 if (fSrc)
2926 return true;
2927
2928 /* ASSUMES: That a preemption test detects HWACCM contexts. (Will work on
2929 non-preemptive systems as well.) */
2930 if (RTThreadPreemptIsEnabled(NIL_RTTHREAD))
2931 return true;
2932
2933 /* PARANOIA: Grab the spinlock to make sure the trunk structure cannot be
2934 freed while we're touching it. */
2935 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
2936 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
2937 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
2938
2939 bool fRc = !pTrunk
2940 || pTrunk->fNoPreemptDsts == (INTNETTRUNKDIR_HOST | INTNETTRUNKDIR_WIRE)
2941 || ( (!pNetwork->MacTab.fHostActive || (pTrunk->fNoPreemptDsts & INTNETTRUNKDIR_HOST) )
2942 && (!pNetwork->MacTab.fWireActive || (pTrunk->fNoPreemptDsts & INTNETTRUNKDIR_WIRE) ) );
2943
2944 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
2945
2946 return fRc;
2947}
2948
2949
2950/**
2951 * Check context, edit, snoop and switch a broadcast frame when sharing MAC
2952 * address on the wire.
2953 *
2954 * The caller must hold at least one interface on the network busy to prevent it
2955 * from destructing beath us.
2956 *
2957 * @param pNetwork The network the frame is being sent to.
2958 * @param fSrc The source of the packet. (0 (intnet),
2959 * INTNETTRUNKDIR_HOST or INTNETTRUNKDIR_WIRE).
2960 * @param pIfSender The sender interface, NULL if trunk. Used to
2961 * prevent sending an echo to the sender.
2962 * @param pSG Pointer to the gather list.
2963 * @param pEthHdr Pointer to the ethernet header.
2964 * @param pDstTab The destination output table.
2965 */
2966static INTNETSWDECISION intnetR0NetworkSharedMacFixAndSwitchBroadcast(PINTNETNETWORK pNetwork,
2967 uint32_t fSrc, PINTNETIF pIfSender,
2968 PINTNETSG pSG, PRTNETETHERHDR pEthHdr,
2969 PINTNETDSTTAB pDstTab)
2970{
2971 /*
2972 * Before doing any work here, we need to figure out if we can handle it
2973 * in the current context. The restrictions are solely on the trunk.
2974 *
2975 * Note! Since at least one interface is busy, there won't be any changes
2976 * to the parameters here (unless the trunk changes its capability
2977 * report, which it shouldn't).
2978 */
2979 if (!intnetR0NetworkIsContextOkForBroadcast(pNetwork, fSrc))
2980 return INTNETSWDECISION_BAD_CONTEXT;
2981
2982 /*
2983 * Check for ARP packets from the wire since we'll have to make
2984 * modification to them if we're sharing the MAC address with the host.
2985 */
2986 if ( (fSrc & INTNETTRUNKDIR_WIRE)
2987 && RT_BE2H_U16(pEthHdr->EtherType) == RTNET_ETHERTYPE_ARP
2988 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
2989 intnetR0NetworkEditArpFromWire(pNetwork, pSG, pEthHdr);
2990
2991 /*
2992 * Check for DHCP packets from the internal net since we'll have to set
2993 * broadcast flag in DHCP requests if we're sharing the MAC address with
2994 * the host. GSO is not applicable to DHCP traffic.
2995 */
2996 if ( !fSrc
2997 && RT_BE2H_U16(pEthHdr->EtherType) == RTNET_ETHERTYPE_IPV4
2998 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
2999 intnetR0NetworkEditDhcpFromIntNet(pNetwork, pSG, pEthHdr);
3000
3001 /*
3002 * Snoop address info from packet orginating from the trunk connection.
3003 */
3004 if (fSrc)
3005 {
3006#ifdef INTNET_WITH_DHCP_SNOOPING
3007 uint16_t EtherType = RT_BE2H_U16(pEthHdr->EtherType);
3008 if ( ( EtherType == RTNET_ETHERTYPE_IPV4 /* for DHCP */
3009 && pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN
3010 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID )
3011 || (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4) )
3012 intnetR0TrunkIfSnoopAddr(pNetwork, pSG, EtherType);
3013#else
3014 if (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4)
3015 intnetR0TrunkIfSnoopArp(pNetwork, pSG);
3016#endif
3017 }
3018
3019 /*
3020 * Create the broadcast destination table.
3021 */
3022 return intnetR0NetworkSwitchBroadcast(pNetwork, fSrc, pIfSender, pDstTab);
3023}
3024
3025
3026/**
3027 * Check context, snoop and switch a unicast frame using the network layer
3028 * address of the link layer one (when sharing MAC address on the wire).
3029 *
3030 * This function is only used for frames coming from the wire (trunk).
3031 *
3032 * @returns true if it's addressed to someone on the network, otherwise false.
3033 * @param pNetwork The network the frame is being sent to.
3034 * @param pSG Pointer to the gather list.
3035 * @param pEthHdr Pointer to the ethernet header.
3036 * @param pDstTab The destination output table.
3037 */
3038static INTNETSWDECISION intnetR0NetworkSharedMacFixAndSwitchUnicast(PINTNETNETWORK pNetwork, PINTNETSG pSG,
3039 PRTNETETHERHDR pEthHdr, PINTNETDSTTAB pDstTab)
3040{
3041 /*
3042 * Extract the network address from the packet.
3043 */
3044 RTNETADDRU Addr;
3045 INTNETADDRTYPE enmAddrType;
3046 uint8_t cbAddr;
3047 switch (RT_BE2H_U16(pEthHdr->EtherType))
3048 {
3049 case RTNET_ETHERTYPE_IPV4:
3050 if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV4, ip_dst), sizeof(Addr.IPv4), &Addr)))
3051 {
3052 Log(("intnetshareduni: failed to read ip_dst! cbTotal=%#x\n", pSG->cbTotal));
3053 return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3054 }
3055 enmAddrType = kIntNetAddrType_IPv4;
3056 cbAddr = sizeof(Addr.IPv4);
3057 Log6(("intnetshareduni: IPv4 %d.%d.%d.%d\n", Addr.au8[0], Addr.au8[1], Addr.au8[2], Addr.au8[3]));
3058 break;
3059
3060#if 0 /** @todo IntNet: implement IPv6 for wireless MAC sharing. */
3061 case RTNET_ETHERTYPE_IPV6
3062 if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV6, ip6_dst), sizeof(Addr.IPv6), &Addr)))
3063 {
3064 Log(("intnetshareduni: failed to read ip6_dst! cbTotal=%#x\n", pSG->cbTotal));
3065 return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3066 }
3067 enmAddrType = kIntNetAddrType_IPv6;
3068 cbAddr = sizeof(Addr.IPv6);
3069 break;
3070#endif
3071#if 0 /** @todo IntNet: implement IPX for wireless MAC sharing? */
3072 case RTNET_ETHERTYPE_IPX_1:
3073 case RTNET_ETHERTYPE_IPX_2:
3074 case RTNET_ETHERTYPE_IPX_3:
3075 if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPX, ipx_dstnet), sizeof(Addr.IPX), &Addr)))
3076 {
3077 Log(("intnetshareduni: failed to read ipx_dstnet! cbTotal=%#x\n", pSG->cbTotal));
3078 return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3079 }
3080 enmAddrType = kIntNetAddrType_IPX;
3081 cbAddr = sizeof(Addr.IPX);
3082 break;
3083#endif
3084
3085 /*
3086 * Treat ARP as broadcast (it shouldn't end up here normally,
3087 * so it goes last in the switch).
3088 */
3089 case RTNET_ETHERTYPE_ARP:
3090 Log6(("intnetshareduni: ARP\n"));
3091 /** @todo revisit this broadcasting of unicast ARP frames! */
3092 return intnetR0NetworkSharedMacFixAndSwitchBroadcast(pNetwork, INTNETTRUNKDIR_WIRE, NULL, pSG, pEthHdr, pDstTab);
3093
3094 /*
3095 * Unknown packets are sent to the trunk and any promiscuous interfaces.
3096 */
3097 default:
3098 {
3099 Log6(("intnetshareduni: unknown ethertype=%#x\n", RT_BE2H_U16(pEthHdr->EtherType)));
3100 return intnetR0NetworkSwitchTrunkAndPromisc(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3101 }
3102 }
3103
3104 /*
3105 * Do level-3 switching.
3106 */
3107 INTNETSWDECISION enmSwDecision = intnetR0NetworkSwitchLevel3(pNetwork, &pEthHdr->DstMac,
3108 enmAddrType, &Addr, cbAddr,
3109 INTNETTRUNKDIR_WIRE, pDstTab);
3110
3111#ifdef INTNET_WITH_DHCP_SNOOPING
3112 /*
3113 * Perform DHCP snooping. GSO is not applicable to DHCP traffic
3114 */
3115 if ( enmAddrType == kIntNetAddrType_IPv4
3116 && pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN
3117 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3118 intnetR0TrunkIfSnoopAddr(pNetwork, pSG, RT_BE2H_U16(pEthHdr->EtherType));
3119#endif /* INTNET_WITH_DHCP_SNOOPING */
3120
3121 return enmSwDecision;
3122}
3123
3124
3125/**
3126 * Release all the interfaces in the destination table when we realize that
3127 * we're in a context where we cannot get the job done.
3128 *
3129 * @param pNetwork The network.
3130 * @param pDstTab The destination table.
3131 */
3132static void intnetR0NetworkReleaseDstTab(PINTNETNETWORK pNetwork, PINTNETDSTTAB pDstTab)
3133{
3134 /* The trunk interface. */
3135 if (pDstTab->fTrunkDst)
3136 {
3137 PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
3138 intnetR0BusyDec(pNetwork, &pTrunk->cBusy);
3139 pDstTab->pTrunk = NULL;
3140 pDstTab->fTrunkDst = 0;
3141 }
3142
3143 /* Regular interfaces. */
3144 uint32_t iIf = pDstTab->cIfs;
3145 while (iIf-- > 0)
3146 {
3147 PINTNETIF pIf = pDstTab->aIfs[iIf].pIf;
3148 intnetR0BusyDecIf(pIf);
3149 pDstTab->aIfs[iIf].pIf = NULL;
3150 }
3151 pDstTab->cIfs = 0;
3152}
3153
3154
3155/**
3156 * Deliver the frame to the interfaces specified in the destination table.
3157 *
3158 * @param pNetwork The network.
3159 * @param pDstTab The destination table.
3160 * @param pSG The frame to send.
3161 * @param pIfSender The sender interface. NULL if it origined via
3162 * the trunk.
3163 */
3164static void intnetR0NetworkDeliver(PINTNETNETWORK pNetwork, PINTNETDSTTAB pDstTab, PINTNETSG pSG, PINTNETIF pIfSender)
3165{
3166 /*
3167 * Do the interfaces first before sending it to the wire and risk having to
3168 * modify it.
3169 */
3170 uint32_t iIf = pDstTab->cIfs;
3171 while (iIf-- > 0)
3172 {
3173 PINTNETIF pIf = pDstTab->aIfs[iIf].pIf;
3174 intnetR0IfSend(pIf, pIfSender, pSG,
3175 pDstTab->aIfs[iIf].fReplaceDstMac ? &pIf->MacAddr: NULL);
3176 intnetR0BusyDecIf(pIf);
3177 pDstTab->aIfs[iIf].pIf = NULL;
3178 }
3179 pDstTab->cIfs = 0;
3180
3181 /*
3182 * Send to the trunk.
3183 *
3184 * Note! The switching functions will include the trunk even when the frame
3185 * source is the trunk. This is because we need it to figure out
3186 * whether the other half of the trunk should see the frame or not
3187 * and let the caller know.
3188 *
3189 * So, we'll ignore trunk sends here if the frame origin is
3190 * INTNETTRUNKSWPORT::pfnRecv.
3191 */
3192 if (pDstTab->fTrunkDst)
3193 {
3194 PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
3195 if (pIfSender)
3196 intnetR0TrunkIfSend(pTrunk, pNetwork, pIfSender, pDstTab->fTrunkDst, pSG);
3197 intnetR0BusyDec(pNetwork, &pTrunk->cBusy);
3198 pDstTab->pTrunk = NULL;
3199 pDstTab->fTrunkDst = 0;
3200 }
3201}
3202
3203
3204/**
3205 * Sends a frame.
3206 *
3207 * This function will distribute the frame to the interfaces it is addressed to.
3208 * It will also update the MAC address of the sender.
3209 *
3210 * The caller must own the network mutex.
3211 *
3212 * @returns The switching decision.
3213 * @param pNetwork The network the frame is being sent to.
3214 * @param pIfSender The interface sending the frame. This is NULL if it's the trunk.
3215 * @param fSrc The source flags. This 0 if it's not from the trunk.
3216 * @param pSG Pointer to the gather list.
3217 * @param pDstTab The destination table to use.
3218 */
3219static INTNETSWDECISION intnetR0NetworkSend(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, uint32_t fSrc,
3220 PINTNETSG pSG, PINTNETDSTTAB pDstTab)
3221{
3222 /*
3223 * Assert reality.
3224 */
3225 AssertPtr(pNetwork);
3226 AssertPtrNull(pIfSender);
3227 Assert(pIfSender ? fSrc == 0 : fSrc != 0);
3228 Assert(!pIfSender || pNetwork == pIfSender->pNetwork);
3229 AssertPtr(pSG);
3230 Assert(pSG->cSegsUsed >= 1);
3231 Assert(pSG->cSegsUsed <= pSG->cSegsAlloc);
3232 if (pSG->cbTotal < sizeof(RTNETETHERHDR))
3233 return INTNETSWDECISION_INVALID;
3234
3235 /*
3236 * Get the ethernet header (might theoretically involve multiple segments).
3237 */
3238 RTNETETHERHDR EthHdr;
3239 if (pSG->aSegs[0].cb >= sizeof(EthHdr))
3240 EthHdr = *(PCRTNETETHERHDR)pSG->aSegs[0].pv;
3241 else if (!intnetR0SgReadPart(pSG, 0, sizeof(EthHdr), &EthHdr))
3242 return INTNETSWDECISION_INVALID;
3243 if ( (EthHdr.DstMac.au8[0] == 0x08 && EthHdr.DstMac.au8[1] == 0x00 && EthHdr.DstMac.au8[2] == 0x27)
3244 || (EthHdr.SrcMac.au8[0] == 0x08 && EthHdr.SrcMac.au8[1] == 0x00 && EthHdr.SrcMac.au8[2] == 0x27)
3245 || (EthHdr.DstMac.au8[0] == 0x00 && EthHdr.DstMac.au8[1] == 0x16 && EthHdr.DstMac.au8[2] == 0xcb)
3246 || (EthHdr.SrcMac.au8[0] == 0x00 && EthHdr.SrcMac.au8[1] == 0x16 && EthHdr.SrcMac.au8[2] == 0xcb)
3247 || EthHdr.DstMac.au8[0] == 0xff
3248 || EthHdr.SrcMac.au8[0] == 0xff)
3249 Log2(("D=%.6Rhxs S=%.6Rhxs T=%04x f=%x z=%x\n",
3250 &EthHdr.DstMac, &EthHdr.SrcMac, RT_BE2H_U16(EthHdr.EtherType), fSrc, pSG->cbTotal));
3251
3252 /*
3253 * Learn the MAC address of the sender. No re-learning as the interface
3254 * user will normally tell us the right MAC address.
3255 *
3256 * Note! We don't notify the trunk about these mainly because of the
3257 * problematic contexts we might be called in.
3258 */
3259 if (RT_UNLIKELY( pIfSender
3260 && !pIfSender->fMacSet
3261 && memcmp(&EthHdr.SrcMac, &pIfSender->MacAddr, sizeof(pIfSender->MacAddr))
3262 && !intnetR0IsMacAddrMulticast(&EthHdr.SrcMac)
3263 ))
3264 {
3265 Log2(("IF MAC: %.6Rhxs -> %.6Rhxs\n", &pIfSender->MacAddr, &EthHdr.SrcMac));
3266 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
3267 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
3268
3269 PINTNETMACTABENTRY pIfEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIfSender);
3270 if (pIfEntry)
3271 pIfEntry->MacAddr = EthHdr.SrcMac;
3272 pIfSender->MacAddr = EthHdr.SrcMac;
3273
3274 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
3275 }
3276
3277 /*
3278 * Deal with MAC address sharing as that may required editing of the
3279 * packets before we dispatch them anywhere.
3280 */
3281 INTNETSWDECISION enmSwDecision;
3282 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
3283 {
3284 if (intnetR0IsMacAddrMulticast(&EthHdr.DstMac))
3285 enmSwDecision = intnetR0NetworkSharedMacFixAndSwitchBroadcast(pNetwork, fSrc, pIfSender, pSG, &EthHdr, pDstTab);
3286 else if (fSrc & INTNETTRUNKDIR_WIRE)
3287 enmSwDecision = intnetR0NetworkSharedMacFixAndSwitchUnicast(pNetwork, pSG, &EthHdr, pDstTab);
3288 else
3289 enmSwDecision = intnetR0NetworkSwitchUnicast(pNetwork, fSrc, pIfSender, &EthHdr.DstMac, pDstTab);
3290 }
3291 else if (intnetR0IsMacAddrMulticast(&EthHdr.DstMac))
3292 enmSwDecision = intnetR0NetworkSwitchBroadcast(pNetwork, fSrc, pIfSender, pDstTab);
3293 else
3294 enmSwDecision = intnetR0NetworkSwitchUnicast(pNetwork, fSrc, pIfSender, &EthHdr.DstMac, pDstTab);
3295
3296 /*
3297 * Deliver to the destinations if we can.
3298 */
3299 if (enmSwDecision != INTNETSWDECISION_BAD_CONTEXT)
3300 {
3301 if (intnetR0NetworkIsContextOk(pNetwork, pIfSender, pDstTab))
3302 intnetR0NetworkDeliver(pNetwork, pDstTab, pSG, pIfSender);
3303 else
3304 {
3305 intnetR0NetworkReleaseDstTab(pNetwork, pDstTab);
3306 enmSwDecision = INTNETSWDECISION_BAD_CONTEXT;
3307 }
3308 }
3309
3310 return enmSwDecision;
3311}
3312
3313
3314/**
3315 * Sends one or more frames.
3316 *
3317 * The function will first the frame which is passed as the optional arguments
3318 * pvFrame and cbFrame. These are optional since it also possible to chain
3319 * together one or more frames in the send buffer which the function will
3320 * process after considering it's arguments.
3321 *
3322 * The caller is responsible for making sure that there are no concurrent calls
3323 * to this method (with the same handle).
3324 *
3325 * @returns VBox status code.
3326 * @param hIf The interface handle.
3327 * @param pSession The caller's session.
3328 */
3329INTNETR0DECL(int) IntNetR0IfSend(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession)
3330{
3331 Log5(("IntNetR0IfSend: hIf=%RX32\n", hIf));
3332
3333 /*
3334 * Validate input and translate the handle.
3335 */
3336 PINTNET pIntNet = g_pIntNet;
3337 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
3338 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
3339
3340 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
3341 if (!pIf)
3342 return VERR_INVALID_HANDLE;
3343 STAM_REL_PROFILE_START(&pIf->pIntBuf->StatSend1, a);
3344
3345 /*
3346 * Make sure we've got a network.
3347 */
3348 int rc = VINF_SUCCESS;
3349 intnetR0BusyIncIf(pIf);
3350 PINTNETNETWORK pNetwork = pIf->pNetwork;
3351 if (RT_LIKELY(pNetwork))
3352 {
3353 /*
3354 * Grab the destination table.
3355 */
3356 PINTNETDSTTAB pDstTab = ASMAtomicXchgPtrT(&pIf->pDstTab, NULL, PINTNETDSTTAB);
3357 if (RT_LIKELY(pDstTab))
3358 {
3359 /*
3360 * Process the send buffer.
3361 */
3362 INTNETSWDECISION enmSwDecision = INTNETSWDECISION_BROADCAST;
3363 INTNETSG Sg; /** @todo this will have to be changed if we're going to use async sending
3364 * with buffer sharing for some OS or service. Darwin copies everything so
3365 * I won't bother allocating and managing SGs rigth now. Sorry. */
3366 PINTNETHDR pHdr;
3367 while ((pHdr = IntNetRingGetNextFrameToRead(&pIf->pIntBuf->Send)) != NULL)
3368 {
3369 uint16_t const u16Type = pHdr->u16Type;
3370 if (u16Type == INTNETHDR_TYPE_FRAME)
3371 {
3372 /* Send regular frame. */
3373 void *pvCurFrame = IntNetHdrGetFramePtr(pHdr, pIf->pIntBuf);
3374 IntNetSgInitTemp(&Sg, pvCurFrame, pHdr->cbFrame);
3375 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
3376 intnetR0IfSnoopAddr(pIf, (uint8_t *)pvCurFrame, pHdr->cbFrame, false /*fGso*/, (uint16_t *)&Sg.fFlags);
3377 enmSwDecision = intnetR0NetworkSend(pNetwork, pIf, 0 /*fSrc*/, &Sg, pDstTab);
3378 }
3379 else if (u16Type == INTNETHDR_TYPE_GSO)
3380 {
3381 /* Send GSO frame if sane. */
3382 PPDMNETWORKGSO pGso = IntNetHdrGetGsoContext(pHdr, pIf->pIntBuf);
3383 uint32_t cbFrame = pHdr->cbFrame - sizeof(*pGso);
3384 if (RT_LIKELY(PDMNetGsoIsValid(pGso, pHdr->cbFrame, cbFrame)))
3385 {
3386 void *pvCurFrame = pGso + 1;
3387 IntNetSgInitTempGso(&Sg, pvCurFrame, cbFrame, pGso);
3388 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
3389 intnetR0IfSnoopAddr(pIf, (uint8_t *)pvCurFrame, cbFrame, true /*fGso*/, (uint16_t *)&Sg.fFlags);
3390 enmSwDecision = intnetR0NetworkSend(pNetwork, pIf, 0 /*fSrc*/, &Sg, pDstTab);
3391 }
3392 else
3393 {
3394 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatBadFrames); /* ignore */
3395 enmSwDecision = INTNETSWDECISION_DROP;
3396 }
3397 }
3398 /* Unless it's a padding frame, we're getting babble from the producer. */
3399 else
3400 {
3401 if (u16Type != INTNETHDR_TYPE_PADDING)
3402 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatBadFrames); /* ignore */
3403 enmSwDecision = INTNETSWDECISION_DROP;
3404 }
3405 if (enmSwDecision == INTNETSWDECISION_BAD_CONTEXT)
3406 {
3407 rc = VERR_TRY_AGAIN;
3408 break;
3409 }
3410
3411 /* Skip to the next frame. */
3412 IntNetRingSkipFrame(&pIf->pIntBuf->Send);
3413 }
3414
3415 /*
3416 * Put back the destination table.
3417 */
3418 Assert(!pIf->pDstTab);
3419 ASMAtomicWritePtr(&pIf->pDstTab, pDstTab);
3420 }
3421 else
3422 rc = VERR_INTERNAL_ERROR_4;
3423 }
3424 else
3425 rc = VERR_INTERNAL_ERROR_3;
3426
3427 /*
3428 * Release the interface.
3429 */
3430 intnetR0BusyDecIf(pIf);
3431 STAM_REL_PROFILE_STOP(&pIf->pIntBuf->StatSend1, a);
3432 intnetR0IfRelease(pIf, pSession);
3433 return rc;
3434}
3435
3436
3437/**
3438 * VMMR0 request wrapper for IntNetR0IfSend.
3439 *
3440 * @returns see IntNetR0IfSend.
3441 * @param pSession The caller's session.
3442 * @param pReq The request packet.
3443 */
3444INTNETR0DECL(int) IntNetR0IfSendReq(PSUPDRVSESSION pSession, PINTNETIFSENDREQ pReq)
3445{
3446 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
3447 return VERR_INVALID_PARAMETER;
3448 return IntNetR0IfSend(pReq->hIf, pSession);
3449}
3450
3451
3452/**
3453 * Maps the default buffer into ring 3.
3454 *
3455 * @returns VBox status code.
3456 * @param hIf The interface handle.
3457 * @param pSession The caller's session.
3458 * @param ppRing3Buf Where to store the address of the ring-3 mapping
3459 * (optional).
3460 * @param ppRing0Buf Where to store the address of the ring-0 mapping
3461 * (optional).
3462 */
3463INTNETR0DECL(int) IntNetR0IfGetBufferPtrs(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession,
3464 R3PTRTYPE(PINTNETBUF) *ppRing3Buf, R0PTRTYPE(PINTNETBUF) *ppRing0Buf)
3465{
3466 LogFlow(("IntNetR0IfGetBufferPtrs: hIf=%RX32 ppRing3Buf=%p ppRing0Buf=%p\n", hIf, ppRing3Buf, ppRing0Buf));
3467
3468 /*
3469 * Validate input.
3470 */
3471 PINTNET pIntNet = g_pIntNet;
3472 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
3473 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
3474
3475 AssertPtrNullReturn(ppRing3Buf, VERR_INVALID_PARAMETER);
3476 AssertPtrNullReturn(ppRing0Buf, VERR_INVALID_PARAMETER);
3477 if (ppRing3Buf)
3478 *ppRing3Buf = 0;
3479 if (ppRing0Buf)
3480 *ppRing0Buf = 0;
3481
3482 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
3483 if (!pIf)
3484 return VERR_INVALID_HANDLE;
3485
3486 /*
3487 * ASSUMES that only the process that created an interface can use it.
3488 * ASSUMES that we created the ring-3 mapping when selecting or
3489 * allocating the buffer.
3490 */
3491 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
3492 if (RT_SUCCESS(rc))
3493 {
3494 if (ppRing3Buf)
3495 *ppRing3Buf = pIf->pIntBufR3;
3496 if (ppRing0Buf)
3497 *ppRing0Buf = (R0PTRTYPE(PINTNETBUF))pIf->pIntBuf; /* tstIntNetR0 mess */
3498
3499 rc = RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
3500 }
3501
3502 intnetR0IfRelease(pIf, pSession);
3503 LogFlow(("IntNetR0IfGetBufferPtrs: returns %Rrc *ppRing3Buf=%p *ppRing0Buf=%p\n",
3504 rc, ppRing3Buf ? *ppRing3Buf : NULL, ppRing0Buf ? *ppRing0Buf : NULL));
3505 return rc;
3506}
3507
3508
3509/**
3510 * VMMR0 request wrapper for IntNetR0IfGetBufferPtrs.
3511 *
3512 * @returns see IntNetR0IfGetRing3Buffer.
3513 * @param pSession The caller's session.
3514 * @param pReq The request packet.
3515 */
3516INTNETR0DECL(int) IntNetR0IfGetBufferPtrsReq(PSUPDRVSESSION pSession, PINTNETIFGETBUFFERPTRSREQ pReq)
3517{
3518 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
3519 return VERR_INVALID_PARAMETER;
3520 return IntNetR0IfGetBufferPtrs(pReq->hIf, pSession, &pReq->pRing3Buf, &pReq->pRing0Buf);
3521}
3522
3523
3524#if 0
3525/**
3526 * Gets the physical addresses of the default interface buffer.
3527 *
3528 * @returns VBox status code.
3529 * @param hIF The interface handle.
3530 * @param paPages Where to store the addresses. (The reserved fields will be set to zero.)
3531 * @param cPages
3532 */
3533INTNETR0DECL(int) IntNetR0IfGetPhysBuffer(INTNETIFHANDLE hIf, PSUPPAGE paPages, unsigned cPages)
3534{
3535 /*
3536 * Validate input.
3537 */
3538 PINTNET pIntNet = g_pIntNet;
3539 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
3540 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
3541
3542 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
3543 AssertPtrReturn((uint8_t *)&paPages[cPages] - 1, VERR_INVALID_PARAMETER);
3544 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
3545 if (!pIf)
3546 return VERR_INVALID_HANDLE;
3547
3548 /*
3549 * Grab the lock and get the data.
3550 * ASSUMES that the handle isn't closed while we're here.
3551 */
3552 int rc = RTSemFastMutexRequest(pIf->pNetwork->FastMutex);
3553 if (RT_SUCCESS(rc))
3554 {
3555 /** @todo make a SUPR0 api for obtaining the array. SUPR0/IPRT is keeping track of everything, there
3556 * is no need for any extra bookkeeping here.. */
3557
3558 rc = RTSemFastMutexRelease(pIf->pNetwork->FastMutex);
3559 }
3560 intnetR0IfRelease(pIf, pSession);
3561 return VERR_NOT_IMPLEMENTED;
3562}
3563#endif
3564
3565
3566/**
3567 * Sets the promiscuous mode property of an interface.
3568 *
3569 * @returns VBox status code.
3570 * @param hIf The interface handle.
3571 * @param pSession The caller's session.
3572 * @param fPromiscuous Set if the interface should be in promiscuous mode, clear if not.
3573 */
3574INTNETR0DECL(int) IntNetR0IfSetPromiscuousMode(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fPromiscuous)
3575{
3576 LogFlow(("IntNetR0IfSetPromiscuousMode: hIf=%RX32 fPromiscuous=%d\n", hIf, fPromiscuous));
3577
3578 /*
3579 * Validate & translate input.
3580 */
3581 PINTNET pIntNet = g_pIntNet;
3582 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
3583 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
3584
3585 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
3586 if (!pIf)
3587 {
3588 Log(("IntNetR0IfSetPromiscuousMode: returns VERR_INVALID_HANDLE\n"));
3589 return VERR_INVALID_HANDLE;
3590 }
3591
3592 /*
3593 * Get the network, take the address spinlock, and make the change.
3594 * Paranoia^2: Mark ourselves busy to prevent anything from being destroyed.
3595 */
3596 int rc = VINF_SUCCESS;
3597 intnetR0BusyIncIf(pIf);
3598 PINTNETNETWORK pNetwork = pIf->pNetwork;
3599 if (pNetwork)
3600 {
3601 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
3602 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
3603
3604 if (pIf->fPromiscuous != fPromiscuous)
3605 {
3606 Log(("IntNetR0IfSetPromiscuousMode: hIf=%RX32: Changed from %d -> %d\n",
3607 hIf, !fPromiscuous, !!fPromiscuous));
3608 ASMAtomicUoWriteBool(&pIf->fPromiscuous, fPromiscuous);
3609
3610 PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
3611 if (RT_LIKELY(pEntry))
3612 pEntry->fPromiscuous = fPromiscuous;
3613 pIf->fPromiscuous = fPromiscuous;
3614 }
3615
3616 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
3617 }
3618 else
3619 rc = VERR_WRONG_ORDER;
3620
3621 intnetR0BusyDecIf(pIf);
3622 intnetR0IfRelease(pIf, pSession);
3623 return rc;
3624}
3625
3626
3627/**
3628 * VMMR0 request wrapper for IntNetR0IfSetPromiscuousMode.
3629 *
3630 * @returns see IntNetR0IfSetPromiscuousMode.
3631 * @param pSession The caller's session.
3632 * @param pReq The request packet.
3633 */
3634INTNETR0DECL(int) IntNetR0IfSetPromiscuousModeReq(PSUPDRVSESSION pSession, PINTNETIFSETPROMISCUOUSMODEREQ pReq)
3635{
3636 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
3637 return VERR_INVALID_PARAMETER;
3638 return IntNetR0IfSetPromiscuousMode(pReq->hIf, pSession, pReq->fPromiscuous);
3639}
3640
3641
3642/**
3643 * Sets the MAC address of an interface.
3644 *
3645 * @returns VBox status code.
3646 * @param hIf The interface handle.
3647 * @param pSession The caller's session.
3648 * @param pMAC The new MAC address.
3649 */
3650INTNETR0DECL(int) IntNetR0IfSetMacAddress(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, PCRTMAC pMac)
3651{
3652 LogFlow(("IntNetR0IfSetMacAddress: hIf=%RX32 pMac=%p:{%.6Rhxs}\n", hIf, pMac, pMac));
3653
3654 /*
3655 * Validate & translate input.
3656 */
3657 PINTNET pIntNet = g_pIntNet;
3658 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
3659 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
3660
3661 AssertPtrReturn(pMac, VERR_INVALID_PARAMETER);
3662 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
3663 if (!pIf)
3664 {
3665 Log(("IntNetR0IfSetMacAddress: returns VERR_INVALID_HANDLE\n"));
3666 return VERR_INVALID_HANDLE;
3667 }
3668
3669 /*
3670 * Get the network, take the address spinlock, and make the change.
3671 * Paranoia^2: Mark ourselves busy to prevent anything from being destroyed.
3672 */
3673 int rc = VINF_SUCCESS;
3674 intnetR0BusyIncIf(pIf);
3675 PINTNETNETWORK pNetwork = pIf->pNetwork;
3676 if (pNetwork)
3677 {
3678 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
3679 PINTNETTRUNKIF pTrunk = NULL;
3680
3681 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
3682
3683 if (memcmp(&pIf->MacAddr, pMac, sizeof(pIf->MacAddr)))
3684 {
3685 Log(("IntNetR0IfSetMacAddress: hIf=%RX32: Changed from %.6Rhxs -> %.6Rhxs\n",
3686 hIf, &pIf->MacAddr, pMac));
3687
3688 /* Update the two copies. */
3689 PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
3690 if (RT_LIKELY(pEntry))
3691 pEntry->MacAddr = *pMac;
3692 pIf->MacAddr = *pMac;
3693 pIf->fMacSet = true;
3694
3695 /* Grab a busy reference to the trunk so we release the lock before notifying it. */
3696 pTrunk = pNetwork->MacTab.pTrunk;
3697 if (pTrunk)
3698 intnetR0BusyIncTrunk(pTrunk);
3699 }
3700
3701 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
3702
3703 if (pTrunk)
3704 {
3705 Log(("IntNetR0IfSetMacAddress: pfnNotifyMacAddress hIf=%RX32\n", hIf));
3706 PINTNETTRUNKIFPORT pIfPort = pTrunk->pIfPort;
3707 if (pIfPort)
3708 pIfPort->pfnNotifyMacAddress(pIfPort, pIf->pvIfData, pMac);
3709 intnetR0BusyDecTrunk(pTrunk);
3710 }
3711 }
3712 else
3713 rc = VERR_WRONG_ORDER;
3714
3715 intnetR0BusyDecIf(pIf);
3716 intnetR0IfRelease(pIf, pSession);
3717 return rc;
3718}
3719
3720
3721/**
3722 * VMMR0 request wrapper for IntNetR0IfSetMacAddress.
3723 *
3724 * @returns see IntNetR0IfSetMacAddress.
3725 * @param pSession The caller's session.
3726 * @param pReq The request packet.
3727 */
3728INTNETR0DECL(int) IntNetR0IfSetMacAddressReq(PSUPDRVSESSION pSession, PINTNETIFSETMACADDRESSREQ pReq)
3729{
3730 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
3731 return VERR_INVALID_PARAMETER;
3732 return IntNetR0IfSetMacAddress(pReq->hIf, pSession, &pReq->Mac);
3733}
3734
3735
3736/**
3737 * Worker for intnetR0IfSetActive and intnetR0IfDestruct.
3738 *
3739 * This function will update the active interface count on the network and
3740 * activate or deactivate the trunk connection if necessary.
3741 *
3742 * The call must own the giant lock (we cannot take it here).
3743 *
3744 * @returns VBox status code.
3745 * @param pNetwork The network.
3746 * @param fIf The interface.
3747 * @param fActive What to do.
3748 */
3749static int intnetR0NetworkSetIfActive(PINTNETNETWORK pNetwork, PINTNETIF pIf, bool fActive)
3750{
3751 /* quick santiy check */
3752 AssertPtr(pNetwork);
3753 AssertPtr(pIf);
3754
3755 /*
3756 * The address spinlock of the network protects the variables, while the
3757 * big lock protects the calling of pfnSetState. Grab both lock at once
3758 * to save us the extra hazzle.
3759 */
3760 PINTNETTRUNKIF pTrunk = NULL;
3761 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
3762 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
3763
3764 /*
3765 * Do the update.
3766 */
3767 if (pIf->fActive != fActive)
3768 {
3769 PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
3770 if (RT_LIKELY(pEntry))
3771 {
3772 pEntry->fActive = fActive;
3773 pIf->fActive = fActive;
3774
3775 if (fActive)
3776 {
3777 pNetwork->cActiveIFs++;
3778 if (pNetwork->cActiveIFs == 1)
3779 {
3780 pTrunk = pNetwork->MacTab.pTrunk;
3781 if (pTrunk)
3782 {
3783 pNetwork->MacTab.fHostActive = true;
3784 pNetwork->MacTab.fWireActive = true;
3785 }
3786 }
3787 }
3788 else
3789 {
3790 pNetwork->cActiveIFs--;
3791 if (pNetwork->cActiveIFs == 0)
3792 {
3793 pTrunk = pNetwork->MacTab.pTrunk;
3794 pNetwork->MacTab.fHostActive = false;
3795 pNetwork->MacTab.fWireActive = false;
3796 }
3797 }
3798 }
3799 }
3800
3801 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
3802
3803 /*
3804 * Tell the trunk if necessary.
3805 * The wait for !busy is for the Solaris streams trunk driver (mostly).
3806 */
3807 if (pTrunk && pTrunk->pIfPort)
3808 {
3809 if (!fActive)
3810 intnetR0BusyWait(pNetwork, &pTrunk->cBusy);
3811
3812 pTrunk->pIfPort->pfnSetState(pTrunk->pIfPort, fActive ? INTNETTRUNKIFSTATE_ACTIVE : INTNETTRUNKIFSTATE_INACTIVE);
3813 }
3814
3815 return VINF_SUCCESS;
3816}
3817
3818
3819/**
3820 * Sets the active property of an interface.
3821 *
3822 * @returns VBox status code.
3823 * @param hIf The interface handle.
3824 * @param pSession The caller's session.
3825 * @param fActive The new state.
3826 */
3827INTNETR0DECL(int) IntNetR0IfSetActive(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fActive)
3828{
3829 LogFlow(("IntNetR0IfSetActive: hIf=%RX32 fActive=%RTbool\n", hIf, fActive));
3830
3831 /*
3832 * Validate & translate input.
3833 */
3834 PINTNET pIntNet = g_pIntNet;
3835 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
3836 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
3837
3838 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
3839 if (!pIf)
3840 {
3841 Log(("IntNetR0IfSetActive: returns VERR_INVALID_HANDLE\n"));
3842 return VERR_INVALID_HANDLE;
3843 }
3844
3845 /*
3846 * Hand it to the network since it might involve the trunk and things are
3847 * tricky there wrt to locking order.
3848 *
3849 * 1. We take the giant lock here. This makes sure nobody is re-enabling
3850 * the network while we're pausing it and vice versa. This also enables
3851 * us to wait for the network to become idle before telling the trunk.
3852 * (Important on Solaris.)
3853 *
3854 * 2. For paranoid reasons, we grab a busy reference to the calling
3855 * interface. This is totally unnecessary but should hurt (when done
3856 * after grabbing the giant lock).
3857 */
3858 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
3859 if (RT_SUCCESS(rc))
3860 {
3861 intnetR0BusyIncIf(pIf);
3862
3863 PINTNETNETWORK pNetwork = pIf->pNetwork;
3864 if (pNetwork)
3865 rc = intnetR0NetworkSetIfActive(pNetwork, pIf, fActive);
3866 else
3867 rc = VERR_WRONG_ORDER;
3868
3869 intnetR0BusyDecIf(pIf);
3870 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
3871 }
3872
3873 intnetR0IfRelease(pIf, pSession);
3874 LogFlow(("IntNetR0IfSetActive: returns %Rrc\n", rc));
3875 return rc;
3876}
3877
3878
3879/**
3880 * VMMR0 request wrapper for IntNetR0IfSetActive.
3881 *
3882 * @returns see IntNetR0IfSetActive.
3883 * @param pIntNet The internal networking instance.
3884 * @param pSession The caller's session.
3885 * @param pReq The request packet.
3886 */
3887INTNETR0DECL(int) IntNetR0IfSetActiveReq(PSUPDRVSESSION pSession, PINTNETIFSETACTIVEREQ pReq)
3888{
3889 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
3890 return VERR_INVALID_PARAMETER;
3891 return IntNetR0IfSetActive(pReq->hIf, pSession, pReq->fActive);
3892}
3893
3894
3895/**
3896 * Wait for the interface to get signaled.
3897 * The interface will be signaled when is put into the receive buffer.
3898 *
3899 * @returns VBox status code.
3900 * @param hIf The interface handle.
3901 * @param pSession The caller's session.
3902 * @param cMillies Number of milliseconds to wait. RT_INDEFINITE_WAIT should be
3903 * used if indefinite wait is desired.
3904 */
3905INTNETR0DECL(int) IntNetR0IfWait(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, uint32_t cMillies)
3906{
3907 Log4(("IntNetR0IfWait: hIf=%RX32 cMillies=%u\n", hIf, cMillies));
3908
3909 /*
3910 * Get and validate essential handles.
3911 */
3912 PINTNET pIntNet = g_pIntNet;
3913 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
3914 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
3915
3916 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
3917 if (!pIf)
3918 {
3919 Log(("IntNetR0IfWait: returns VERR_INVALID_HANDLE\n"));
3920 return VERR_INVALID_HANDLE;
3921 }
3922
3923 const INTNETIFHANDLE hIfSelf = pIf->hIf;
3924 const RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
3925 const bool fDestroying = ASMAtomicReadBool(&pIf->fDestroying);
3926 if ( hIfSelf != hIf /* paranoia */
3927 || hRecvEvent == NIL_RTSEMEVENT
3928 || fDestroying
3929 )
3930 {
3931 Log(("IntNetR0IfWait: returns VERR_SEM_DESTROYED\n"));
3932 return VERR_SEM_DESTROYED;
3933 }
3934
3935 /*
3936 * It is tempting to check if there is data to be read here,
3937 * but the problem with such an approach is that it will cause
3938 * one unnecessary supervisor->user->supervisor trip. There is
3939 * already a slight risk for such, so no need to increase it.
3940 */
3941
3942 /*
3943 * Increment the number of waiters before starting the wait.
3944 * Upon wakeup we must assert reality, checking that we're not
3945 * already destroyed or in the process of being destroyed. This
3946 * code must be aligned with the waiting code in intnetR0IfDestruct.
3947 */
3948 ASMAtomicIncU32(&pIf->cSleepers);
3949 int rc = RTSemEventWaitNoResume(hRecvEvent, cMillies);
3950 if (pIf->hRecvEvent == hRecvEvent)
3951 {
3952 ASMAtomicDecU32(&pIf->cSleepers);
3953 if (!pIf->fDestroying)
3954 {
3955 if (intnetR0IfRelease(pIf, pSession))
3956 rc = VERR_SEM_DESTROYED;
3957 }
3958 else
3959 rc = VERR_SEM_DESTROYED;
3960 }
3961 else
3962 rc = VERR_SEM_DESTROYED;
3963 Log4(("IntNetR0IfWait: returns %Rrc\n", rc));
3964 return rc;
3965}
3966
3967
3968/**
3969 * VMMR0 request wrapper for IntNetR0IfWait.
3970 *
3971 * @returns see IntNetR0IfWait.
3972 * @param pSession The caller's session.
3973 * @param pReq The request packet.
3974 */
3975INTNETR0DECL(int) IntNetR0IfWaitReq(PSUPDRVSESSION pSession, PINTNETIFWAITREQ pReq)
3976{
3977 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
3978 return VERR_INVALID_PARAMETER;
3979 return IntNetR0IfWait(pReq->hIf, pSession, pReq->cMillies);
3980}
3981
3982
3983/**
3984 * Wake up any threads waiting on the interface.
3985 *
3986 * @returns VBox status code.
3987 * @param hIf The interface handle.
3988 * @param pSession The caller's session.
3989 * @param fNoMoreWaits When set, no more waits are permitted.
3990 */
3991INTNETR0DECL(int) IntNetR0IfAbortWait(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fNoMoreWaits)
3992{
3993 Log4(("IntNetR0IfAbortWait: hIf=%RX32 fNoMoreWaits=%RTbool\n", hIf, fNoMoreWaits));
3994
3995 /*
3996 * Get and validate essential handles.
3997 */
3998 PINTNET pIntNet = g_pIntNet;
3999 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4000 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4001
4002 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4003 if (!pIf)
4004 {
4005 Log(("IntNetR0IfAbortWait: returns VERR_INVALID_HANDLE\n"));
4006 return VERR_INVALID_HANDLE;
4007 }
4008
4009 const INTNETIFHANDLE hIfSelf = pIf->hIf;
4010 const RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
4011 const bool fDestroying = ASMAtomicReadBool(&pIf->fDestroying);
4012 if ( hIfSelf != hIf /* paranoia */
4013 || hRecvEvent == NIL_RTSEMEVENT
4014 || fDestroying
4015 )
4016 {
4017 Log(("IntNetR0IfAbortWait: returns VERR_SEM_DESTROYED\n"));
4018 return VERR_SEM_DESTROYED;
4019 }
4020
4021 /*
4022 * Set fDestroying if requested to do so and then wake up all the sleeping
4023 * threads (usually just one). We leave the semaphore in the signalled
4024 * state so the next caller will return immediately.
4025 */
4026 if (fNoMoreWaits)
4027 ASMAtomicWriteBool(&pIf->fDestroying, true);
4028
4029 uint32_t cSleepers = ASMAtomicReadU32(&pIf->cSleepers) + 1;
4030 while (cSleepers-- > 0)
4031 {
4032 int rc = RTSemEventSignal(pIf->hRecvEvent);
4033 AssertRC(rc);
4034 }
4035
4036 Log4(("IntNetR0IfWait: returns %Rrc\n", VINF_SUCCESS));
4037 return VINF_SUCCESS;
4038}
4039
4040
4041/**
4042 * VMMR0 request wrapper for IntNetR0IfAbortWait.
4043 *
4044 * @returns see IntNetR0IfWait.
4045 * @param pSession The caller's session.
4046 * @param pReq The request packet.
4047 */
4048INTNETR0DECL(int) IntNetR0IfAbortWaitReq(PSUPDRVSESSION pSession, PINTNETIFABORTWAITREQ pReq)
4049{
4050 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4051 return VERR_INVALID_PARAMETER;
4052 return IntNetR0IfAbortWait(pReq->hIf, pSession, pReq->fNoMoreWaits);
4053}
4054
4055
4056/**
4057 * Close an interface.
4058 *
4059 * @returns VBox status code.
4060 * @param pIntNet The instance handle.
4061 * @param hIf The interface handle.
4062 * @param pSession The caller's session.
4063 */
4064INTNETR0DECL(int) IntNetR0IfClose(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession)
4065{
4066 LogFlow(("IntNetR0IfClose: hIf=%RX32\n", hIf));
4067
4068 /*
4069 * Validate and free the handle.
4070 */
4071 PINTNET pIntNet = g_pIntNet;
4072 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4073 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4074
4075 PINTNETIF pIf = (PINTNETIF)RTHandleTableFreeWithCtx(pIntNet->hHtIfs, hIf, pSession);
4076 if (!pIf)
4077 return VERR_INVALID_HANDLE;
4078
4079 /* Mark the handle as freed so intnetR0IfDestruct won't free it again. */
4080 ASMAtomicWriteU32(&pIf->hIf, INTNET_HANDLE_INVALID);
4081
4082 /*
4083 * Signal the event semaphore to wake up any threads in IntNetR0IfWait
4084 * and give them a moment to get out and release the interface.
4085 */
4086 uint32_t i = pIf->cSleepers;
4087 while (i-- > 0)
4088 {
4089 RTSemEventSignal(pIf->hRecvEvent);
4090 RTThreadYield();
4091 }
4092 RTSemEventSignal(pIf->hRecvEvent);
4093
4094 /*
4095 * Release the references to the interface object (handle + free lookup).
4096 */
4097 void *pvObj = pIf->pvObj;
4098 intnetR0IfRelease(pIf, pSession); /* (RTHandleTableFreeWithCtx) */
4099
4100 int rc = SUPR0ObjRelease(pvObj, pSession);
4101 LogFlow(("IntNetR0IfClose: returns %Rrc\n", rc));
4102 return rc;
4103}
4104
4105
4106/**
4107 * VMMR0 request wrapper for IntNetR0IfCloseReq.
4108 *
4109 * @returns see IntNetR0IfClose.
4110 * @param pSession The caller's session.
4111 * @param pReq The request packet.
4112 */
4113INTNETR0DECL(int) IntNetR0IfCloseReq(PSUPDRVSESSION pSession, PINTNETIFCLOSEREQ pReq)
4114{
4115 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4116 return VERR_INVALID_PARAMETER;
4117 return IntNetR0IfClose(pReq->hIf, pSession);
4118}
4119
4120
4121/**
4122 * Interface destructor callback.
4123 * This is called for reference counted objectes when the count reaches 0.
4124 *
4125 * @param pvObj The object pointer.
4126 * @param pvUser1 Pointer to the interface.
4127 * @param pvUser2 Pointer to the INTNET instance data.
4128 */
4129static DECLCALLBACK(void) intnetR0IfDestruct(void *pvObj, void *pvUser1, void *pvUser2)
4130{
4131 PINTNETIF pIf = (PINTNETIF)pvUser1;
4132 PINTNET pIntNet = (PINTNET)pvUser2;
4133 Log(("intnetR0IfDestruct: pvObj=%p pIf=%p pIntNet=%p hIf=%RX32\n", pvObj, pIf, pIntNet, pIf->hIf));
4134
4135 /*
4136 * We grab the INTNET create/open/destroy semaphore to make sure nobody is
4137 * adding or removing interface while we're in here. For paranoid reasons
4138 * we also mark the interface as destroyed here so any waiting threads can
4139 * take evasive action (theoretical case).
4140 */
4141 RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
4142 ASMAtomicWriteBool(&pIf->fDestroying, true);
4143
4144 /*
4145 * Delete the interface handle so the object no longer can be used.
4146 * (Can happen if the client didn't close its session.)
4147 */
4148 INTNETIFHANDLE hIf = ASMAtomicXchgU32(&pIf->hIf, INTNET_HANDLE_INVALID);
4149 if (hIf != INTNET_HANDLE_INVALID)
4150 {
4151 void *pvObj2 = RTHandleTableFreeWithCtx(pIntNet->hHtIfs, hIf, pIf->pSession); NOREF(pvObj2);
4152 AssertMsg(pvObj2 == pIf, ("%p, %p, hIf=%RX32 pSession=%p\n", pvObj2, pIf, hIf, pIf->pSession));
4153 }
4154
4155 /*
4156 * If we've got a network deactivate and detach ourselves from it. Because
4157 * of cleanup order we might have been orphaned by the network destructor.
4158 */
4159 PINTNETNETWORK pNetwork = pIf->pNetwork;
4160 if (pNetwork)
4161 {
4162 /* set inactive. */
4163 intnetR0NetworkSetIfActive(pNetwork, pIf, false /*fActive*/);
4164
4165 /* remove ourselves from the switch table. */
4166 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
4167 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
4168
4169 uint32_t iIf = pNetwork->MacTab.cEntries;
4170 while (iIf-- > 0)
4171 if (pNetwork->MacTab.paEntries[iIf].pIf == pIf)
4172 {
4173 if (iIf + 1 < pNetwork->MacTab.cEntries)
4174 memmove(&pNetwork->MacTab.paEntries[iIf],
4175 &pNetwork->MacTab.paEntries[iIf + 1],
4176 (pNetwork->MacTab.cEntries - iIf - 1) * sizeof(pNetwork->MacTab.paEntries[0]));
4177 pNetwork->MacTab.cEntries--;
4178 break;
4179 }
4180
4181 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
4182
4183 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
4184
4185 /* Notify the trunk about the interface being destroyed. */
4186 if (pTrunk && pTrunk->pIfPort)
4187 pTrunk->pIfPort->pfnDisconnectInterface(pTrunk->pIfPort, pIf->pvIfData);
4188
4189 /* Wait for the interface to quiesce while we still can. */
4190 intnetR0BusyWait(pNetwork, &pIf->cBusy);
4191
4192 /* Release our reference to the network. */
4193 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
4194 pIf->pNetwork = NULL;
4195 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
4196
4197 SUPR0ObjRelease(pNetwork->pvObj, pIf->pSession);
4198 }
4199
4200 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
4201
4202 /*
4203 * Wakeup anyone waiting on this interface.
4204 *
4205 * We *must* make sure they have woken up properly and realized
4206 * that the interface is no longer valid.
4207 */
4208 if (pIf->hRecvEvent != NIL_RTSEMEVENT)
4209 {
4210 RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
4211 unsigned cMaxWait = 0x1000;
4212 while (pIf->cSleepers && cMaxWait-- > 0)
4213 {
4214 RTSemEventSignal(hRecvEvent);
4215 RTThreadYield();
4216 }
4217 if (pIf->cSleepers)
4218 {
4219 RTThreadSleep(1);
4220
4221 cMaxWait = pIf->cSleepers;
4222 while (pIf->cSleepers && cMaxWait-- > 0)
4223 {
4224 RTSemEventSignal(hRecvEvent);
4225 RTThreadSleep(10);
4226 }
4227 }
4228
4229 RTSemEventDestroy(hRecvEvent);
4230 pIf->hRecvEvent = NIL_RTSEMEVENT;
4231 }
4232
4233 /*
4234 * Unmap user buffer.
4235 */
4236 if (pIf->pIntBuf != pIf->pIntBufDefault)
4237 {
4238 /** @todo user buffer */
4239 }
4240
4241 /*
4242 * Unmap and Free the default buffer.
4243 */
4244 if (pIf->pIntBufDefault)
4245 {
4246 SUPR0MemFree(pIf->pSession, (RTHCUINTPTR)pIf->pIntBufDefault);
4247 pIf->pIntBufDefault = NULL;
4248 pIf->pIntBufDefaultR3 = 0;
4249 pIf->pIntBuf = NULL;
4250 pIf->pIntBufR3 = 0;
4251 }
4252
4253 /*
4254 * Free remaining resources
4255 */
4256 RTSpinlockDestroy(pIf->hRecvInSpinlock);
4257 pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
4258
4259 RTMemFree(pIf->pDstTab);
4260 pIf->pDstTab = NULL;
4261
4262 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End; i++)
4263 intnetR0IfAddrCacheDestroy(&pIf->aAddrCache[i]);
4264
4265 pIf->pvObj = NULL;
4266 RTMemFree(pIf);
4267}
4268
4269
4270/**
4271 * Creates a new network interface.
4272 *
4273 * The call must have opened the network for the new interface and is
4274 * responsible for closing it on failure. On success it must leave the network
4275 * opened so the interface destructor can close it.
4276 *
4277 * @returns VBox status code.
4278 * @param pNetwork The network, referenced. The reference is consumed on
4279 * success.
4280 * @param pSession The session handle.
4281 * @param cbSend The size of the send buffer.
4282 * @param cbRecv The size of the receive buffer.
4283 * @param phIf Where to store the interface handle.
4284 */
4285static int intnetR0NetworkCreateIf(PINTNETNETWORK pNetwork, PSUPDRVSESSION pSession, unsigned cbSend, unsigned cbRecv,
4286 PINTNETIFHANDLE phIf)
4287{
4288 LogFlow(("intnetR0NetworkCreateIf: pNetwork=%p pSession=%p cbSend=%u cbRecv=%u phIf=%p\n",
4289 pNetwork, pSession, cbSend, cbRecv, phIf));
4290
4291 /*
4292 * Assert input.
4293 */
4294 AssertPtr(pNetwork);
4295 AssertPtr(phIf);
4296
4297 /*
4298 * Make sure that all destination tables as well as the have space of
4299 */
4300 int rc = intnetR0NetworkEnsureTabSpace(pNetwork);
4301 if (RT_FAILURE(rc))
4302 return rc;
4303
4304 /*
4305 * Allocate the interface and initalize it.
4306 */
4307 PINTNETIF pIf = (PINTNETIF)RTMemAllocZ(sizeof(*pIf));
4308 if (!pIf)
4309 return VERR_NO_MEMORY;
4310
4311 memset(&pIf->MacAddr, 0xff, sizeof(pIf->MacAddr)); /* broadcast */
4312 //pIf->fMacSet = false;
4313 //pIf->fPromiscuous = false;
4314 //pIf->fActive = false;
4315 //pIf->fDestroying = false;
4316 //pIf->cYields = 0;
4317 //pIf->pIntBuf = 0;
4318 //pIf->pIntBufR3 = NIL_RTR3PTR;
4319 //pIf->pIntBufDefault = 0;
4320 //pIf->pIntBufDefaultR3 = NIL_RTR3PTR;
4321 pIf->hRecvEvent = NIL_RTSEMEVENT;
4322 //pIf->cSleepers = 0;
4323 pIf->hIf = INTNET_HANDLE_INVALID;
4324 pIf->pNetwork = pNetwork;
4325 pIf->pSession = pSession;
4326 //pIf->pvObj = NULL;
4327 //pIf->aAddrCache = {0};
4328 pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
4329 pIf->cBusy = 0;
4330 //pIf->pDstTab = NULL;
4331 //pIf->pvIfData = NULL;
4332
4333 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End && RT_SUCCESS(rc); i++)
4334 rc = intnetR0IfAddrCacheInit(&pIf->aAddrCache[i], (INTNETADDRTYPE)i,
4335 !!(pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE));
4336 if (RT_SUCCESS(rc))
4337 rc = intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, (PINTNETDSTTAB *)&pIf->pDstTab);
4338 if (RT_SUCCESS(rc))
4339 rc = RTSemEventCreate((PRTSEMEVENT)&pIf->hRecvEvent);
4340 if (RT_SUCCESS(rc))
4341 rc = RTSpinlockCreate(&pIf->hRecvInSpinlock);
4342 if (RT_SUCCESS(rc))
4343 {
4344 /*
4345 * Create the default buffer.
4346 */
4347 /** @todo adjust with minimums and apply defaults here. */
4348 cbRecv = RT_ALIGN(RT_MAX(cbRecv, sizeof(INTNETHDR) * 4), INTNETRINGBUF_ALIGNMENT);
4349 cbSend = RT_ALIGN(RT_MAX(cbSend, sizeof(INTNETHDR) * 4), INTNETRINGBUF_ALIGNMENT);
4350 const unsigned cbBuf = RT_ALIGN(sizeof(*pIf->pIntBuf), INTNETRINGBUF_ALIGNMENT) + cbRecv + cbSend;
4351 rc = SUPR0MemAlloc(pIf->pSession, cbBuf, (PRTR0PTR)&pIf->pIntBufDefault, (PRTR3PTR)&pIf->pIntBufDefaultR3);
4352 if (RT_SUCCESS(rc))
4353 {
4354 ASMMemZero32(pIf->pIntBufDefault, cbBuf); /** @todo I thought I specified these buggers as clearing the memory... */
4355
4356 pIf->pIntBuf = pIf->pIntBufDefault;
4357 pIf->pIntBufR3 = pIf->pIntBufDefaultR3;
4358 IntNetBufInit(pIf->pIntBuf, cbBuf, cbRecv, cbSend);
4359
4360 /*
4361 * Register the interface with the session and create a handle for it.
4362 */
4363 pIf->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_INTERNAL_NETWORK_INTERFACE,
4364 intnetR0IfDestruct, pIf, pNetwork->pIntNet);
4365 if (pIf->pvObj)
4366 {
4367 rc = RTHandleTableAllocWithCtx(pNetwork->pIntNet->hHtIfs, pIf, pSession, (uint32_t *)&pIf->hIf);
4368 if (RT_SUCCESS(rc))
4369 {
4370 /*
4371 * Finally add the interface to the network, consuming the
4372 * network reference of the caller.
4373 */
4374 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
4375 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
4376
4377 uint32_t iIf = pNetwork->MacTab.cEntries;
4378 Assert(iIf + 1 <= pNetwork->MacTab.cEntriesAllocated);
4379
4380 pNetwork->MacTab.paEntries[iIf].MacAddr = pIf->MacAddr;
4381 pNetwork->MacTab.paEntries[iIf].fActive = false;
4382 pNetwork->MacTab.paEntries[iIf].fPromiscuous = false;
4383 pNetwork->MacTab.paEntries[iIf].pIf = pIf;
4384
4385 pNetwork->MacTab.cEntries = iIf + 1;
4386 pIf->pNetwork = pNetwork;
4387
4388 /*
4389 * Grab a busy reference (paranoia) to the trunk before releaseing
4390 * the spinlock and then notify it about the new interface.
4391 */
4392 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
4393 if (pTrunk)
4394 intnetR0BusyIncTrunk(pTrunk);
4395
4396 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
4397
4398 if (pTrunk)
4399 {
4400 Log(("intnetR0NetworkCreateIf: pfnConnectInterface hIf=%RX32\n", pIf->hIf));
4401 if (pTrunk->pIfPort)
4402 rc = pTrunk->pIfPort->pfnConnectInterface(pTrunk->pIfPort, pIf, &pIf->pvIfData);
4403 intnetR0BusyDecTrunk(pTrunk);
4404 }
4405 if (RT_SUCCESS(rc))
4406 {
4407 /*
4408 * We're good!
4409 */
4410 *phIf = pIf->hIf;
4411 Log(("intnetR0NetworkCreateIf: returns VINF_SUCCESS *phIf=%RX32 cbSend=%u cbRecv=%u cbBuf=%u\n",
4412 *phIf, pIf->pIntBufDefault->cbSend, pIf->pIntBufDefault->cbRecv, pIf->pIntBufDefault->cbBuf));
4413 return VINF_SUCCESS;
4414 }
4415 }
4416
4417 SUPR0ObjRelease(pIf->pvObj, pSession);
4418 LogFlow(("intnetR0NetworkCreateIf: returns %Rrc\n", rc));
4419 return rc;
4420 }
4421
4422 /* clean up */
4423 SUPR0MemFree(pIf->pSession, (RTHCUINTPTR)pIf->pIntBufDefault);
4424 pIf->pIntBufDefault = NULL;
4425 pIf->pIntBuf = NULL;
4426 }
4427 }
4428
4429 RTSpinlockDestroy(pIf->hRecvInSpinlock);
4430 pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
4431 RTSemEventDestroy(pIf->hRecvEvent);
4432 pIf->hRecvEvent = NIL_RTSEMEVENT;
4433 RTMemFree(pIf->pDstTab);
4434 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End; i++)
4435 intnetR0IfAddrCacheDestroy(&pIf->aAddrCache[i]);
4436 RTMemFree(pIf);
4437 LogFlow(("intnetR0NetworkCreateIf: returns %Rrc\n", rc));
4438 return rc;
4439}
4440
4441
4442/** @copydoc INTNETTRUNKSWPORT::pfnSetSGPhys */
4443static DECLCALLBACK(bool) intnetR0TrunkIfPortSetSGPhys(PINTNETTRUNKSWPORT pSwitchPort, bool fEnable)
4444{
4445 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
4446 AssertMsgFailed(("Not implemented because it wasn't required on Darwin\n"));
4447 return ASMAtomicXchgBool(&pThis->fPhysSG, fEnable);
4448}
4449
4450
4451/** @copydoc INTNETTRUNKSWPORT::pfnReportMacAddress */
4452static DECLCALLBACK(void) intnetR0TrunkIfPortReportMacAddress(PINTNETTRUNKSWPORT pSwitchPort, PCRTMAC pMacAddr)
4453{
4454 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
4455
4456 /*
4457 * Get the network instance and grab the address spinlock before making
4458 * any changes.
4459 */
4460 intnetR0BusyIncTrunk(pThis);
4461 PINTNETNETWORK pNetwork = pThis->pNetwork;
4462 if (pNetwork)
4463 {
4464 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
4465 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
4466
4467 pNetwork->MacTab.HostMac = *pMacAddr;
4468 pThis->MacAddr = *pMacAddr;
4469
4470 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
4471 }
4472 else
4473 pThis->MacAddr = *pMacAddr;
4474 intnetR0BusyDecTrunk(pThis);
4475}
4476
4477
4478/** @copydoc INTNETTRUNKSWPORT::pfnReportPromiscuousMode */
4479static DECLCALLBACK(void) intnetR0TrunkIfPortReportPromiscuousMode(PINTNETTRUNKSWPORT pSwitchPort, bool fPromiscuous)
4480{
4481 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
4482
4483 /*
4484 * Get the network instance and grab the address spinlock before making
4485 * any changes.
4486 */
4487 intnetR0BusyIncTrunk(pThis);
4488 PINTNETNETWORK pNetwork = pThis->pNetwork;
4489 if (pNetwork)
4490 {
4491 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
4492 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
4493
4494 pNetwork->MacTab.fHostPromiscuous = fPromiscuous;
4495
4496 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
4497 }
4498 intnetR0BusyDecTrunk(pThis);
4499}
4500
4501
4502/** @copydoc INTNETTRUNKSWPORT::pfnReportGsoCapabilities */
4503static DECLCALLBACK(void) intnetR0TrunkIfPortReportGsoCapabilities(PINTNETTRUNKSWPORT pSwitchPort,
4504 uint32_t fGsoCapabilities, uint32_t fDst)
4505{
4506 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
4507
4508 for (unsigned iBit = PDMNETWORKGSOTYPE_END; iBit < 32; iBit++)
4509 Assert(!(fGsoCapabilities & RT_BIT_32(iBit)));
4510 Assert(!(fDst & ~INTNETTRUNKDIR_VALID_MASK));
4511 Assert(fDst);
4512
4513 if (fDst & INTNETTRUNKDIR_HOST)
4514 pThis->fHostGsoCapabilites = fGsoCapabilities;
4515
4516 if (fDst & INTNETTRUNKDIR_WIRE)
4517 pThis->fWireGsoCapabilites = fGsoCapabilities;
4518}
4519
4520
4521/** @copydoc INTNETTRUNKSWPORT::pfnReportNoPreemptDsts */
4522static DECLCALLBACK(void) intnetR0TrunkIfPortReportNoPreemptDsts(PINTNETTRUNKSWPORT pSwitchPort, uint32_t fNoPreemptDsts)
4523{
4524 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
4525 Assert(!(fNoPreemptDsts & ~INTNETTRUNKDIR_VALID_MASK));
4526
4527 pThis->fNoPreemptDsts = fNoPreemptDsts;
4528}
4529
4530
4531/** @copydoc INTNETTRUNKSWPORT::pfnPreRecv */
4532static DECLCALLBACK(INTNETSWDECISION) intnetR0TrunkIfPortPreRecv(PINTNETTRUNKSWPORT pSwitchPort,
4533 void const *pvSrc, size_t cbSrc, uint32_t fSrc)
4534{
4535 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
4536
4537 /* assert some sanity */
4538 AssertPtr(pvSrc);
4539 AssertReturn(cbSrc >= 6, INTNETSWDECISION_BROADCAST);
4540 Assert(fSrc);
4541
4542 /*
4543 * Mark the trunk as busy, make sure we've got a network and that there are
4544 * some active interfaces around.
4545 */
4546 INTNETSWDECISION enmSwDecision = INTNETSWDECISION_TRUNK;
4547 intnetR0BusyIncTrunk(pThis);
4548 PINTNETNETWORK pNetwork = pThis->pNetwork;
4549 if (RT_LIKELY( pNetwork
4550 && pNetwork->cActiveIFs > 0 ))
4551 {
4552 /*
4553 * Lazy bird! No pre-switching of multicast and shared-MAC-on-wire.
4554 */
4555 PCRTNETETHERHDR pEthHdr = (PCRTNETETHERHDR)pvSrc;
4556 if (intnetR0IsMacAddrMulticast(&pEthHdr->DstMac))
4557 enmSwDecision = INTNETSWDECISION_BROADCAST;
4558 else if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
4559 enmSwDecision = INTNETSWDECISION_BROADCAST;
4560 else
4561 enmSwDecision = intnetR0NetworkPreSwitchUnicast(pNetwork,
4562 fSrc,
4563 cbSrc >= 12 ? &pEthHdr->SrcMac : NULL,
4564 &pEthHdr->DstMac);
4565 }
4566
4567 intnetR0BusyDecTrunk(pThis);
4568 return enmSwDecision;
4569}
4570
4571
4572/** @copydoc INTNETTRUNKSWPORT::pfnRecv */
4573static DECLCALLBACK(bool) intnetR0TrunkIfPortRecv(PINTNETTRUNKSWPORT pSwitchPort, void *pvIf, PINTNETSG pSG, uint32_t fSrc)
4574{
4575 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
4576
4577 /* assert some sanity */
4578 AssertPtr(pSG);
4579 Assert(fSrc);
4580 NOREF(pvIf); /* later */
4581
4582 /*
4583 * Mark the trunk as busy, make sure we've got a network and that there are
4584 * some active interfaces around.
4585 */
4586 bool fRc = false /* don't drop it */;
4587 intnetR0BusyIncTrunk(pThis);
4588 PINTNETNETWORK pNetwork = pThis->pNetwork;
4589 if (RT_LIKELY( pNetwork
4590 && pNetwork->cActiveIFs > 0 ))
4591 {
4592 /*
4593 * Grab or allocate a destination table.
4594 */
4595 bool const fIntCtx = RTThreadPreemptIsEnabled(NIL_RTTHREAD) || RTThreadIsInInterrupt(NIL_RTTHREAD);
4596 unsigned iDstTab = 0;
4597 PINTNETDSTTAB pDstTab = NULL;
4598 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
4599 RTSpinlockAcquireNoInts(pThis->hDstTabSpinlock, &Tmp);
4600 if (fIntCtx)
4601 {
4602 /* Interrupt or restricted context. */
4603 iDstTab = RTMpCpuIdToSetIndex(RTMpCpuId());
4604 iDstTab %= pThis->cIntDstTabs;
4605 pDstTab = pThis->apIntDstTabs[iDstTab];
4606 if (RT_LIKELY(pDstTab))
4607 pThis->apIntDstTabs[iDstTab] = NULL;
4608 else
4609 {
4610 iDstTab = pThis->cIntDstTabs;
4611 while (iDstTab-- > 0)
4612 {
4613 pDstTab = pThis->apIntDstTabs[iDstTab];
4614 if (pDstTab)
4615 {
4616 pThis->apIntDstTabs[iDstTab] = NULL;
4617 break;
4618 }
4619 }
4620 }
4621 RTSpinlockReleaseNoInts(pThis->hDstTabSpinlock, &Tmp);
4622 Assert(!pDstTab || iDstTab < pThis->cIntDstTabs);
4623 }
4624 else
4625 {
4626 /* Task context, fallback is to allocate a table. */
4627 AssertCompile(RT_ELEMENTS(pThis->apTaskDstTabs) == 2); /* for loop rollout */
4628 pDstTab = pThis->apIntDstTabs[iDstTab = 0];
4629 if (!pDstTab)
4630 pDstTab = pThis->apIntDstTabs[iDstTab = 1];
4631 if (pDstTab)
4632 {
4633 pThis->apIntDstTabs[iDstTab] = NULL;
4634 RTSpinlockReleaseNoInts(pThis->hDstTabSpinlock, &Tmp);
4635 Assert(iDstTab < RT_ELEMENTS(pThis->apTaskDstTabs));
4636 }
4637 else
4638 {
4639 RTSpinlockReleaseNoInts(pThis->hDstTabSpinlock, &Tmp);
4640 intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, &pDstTab);
4641 iDstTab = 65535;
4642 }
4643 }
4644 if (RT_LIKELY(pDstTab))
4645 {
4646 /*
4647 * Finally, get down to business of sending the frame.
4648 */
4649 INTNETSWDECISION enmSwDecision = intnetR0NetworkSend(pNetwork, NULL, fSrc, pSG, pDstTab);
4650 AssertMsg(enmSwDecision != INTNETSWDECISION_BAD_CONTEXT, ("fSrc=%#x fTrunkDst=%#x hdr=%.14Rhxs\n", fSrc, pDstTab->fTrunkDst, pSG->aSegs[0].pv));
4651 if (enmSwDecision == INTNETSWDECISION_INTNET)
4652 fRc = true; /* drop it */
4653
4654 /*
4655 * Free the destination table.
4656 */
4657 if (iDstTab == 65535)
4658 RTMemFree(pDstTab);
4659 else
4660 {
4661 RTSpinlockAcquireNoInts(pThis->hDstTabSpinlock, &Tmp);
4662 if (fIntCtx && !pThis->apIntDstTabs[iDstTab])
4663 pThis->apIntDstTabs[iDstTab] = pDstTab;
4664 else if (!fIntCtx && !pThis->apTaskDstTabs[iDstTab])
4665 pThis->apTaskDstTabs[iDstTab] = pDstTab;
4666 else
4667 {
4668 /* this shouldn't happen! */
4669 PINTNETDSTTAB *papDstTabs = fIntCtx ? &pThis->apIntDstTabs[0] : &pThis->apTaskDstTabs[0];
4670 iDstTab = fIntCtx ? pThis->cIntDstTabs : RT_ELEMENTS(pThis->apTaskDstTabs);
4671 while (iDstTab-- > 0)
4672 if (!papDstTabs[iDstTab])
4673 {
4674 papDstTabs[iDstTab] = pDstTab;
4675 break;
4676 }
4677 }
4678 RTSpinlockReleaseNoInts(pThis->hDstTabSpinlock, &Tmp);
4679 Assert(iDstTab < RT_MAX(RT_ELEMENTS(pThis->apTaskDstTabs), pThis->cIntDstTabs));
4680 }
4681 }
4682 }
4683
4684 intnetR0BusyDecTrunk(pThis);
4685 return fRc;
4686}
4687
4688
4689/** @copydoc INTNETTRUNKSWPORT::pfnSGRetain */
4690static DECLCALLBACK(void) intnetR0TrunkIfPortSGRetain(PINTNETTRUNKSWPORT pSwitchPort, PINTNETSG pSG)
4691{
4692 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
4693 PINTNETNETWORK pNetwork = pThis->pNetwork;
4694
4695 /* assert some sanity */
4696 AssertPtrReturnVoid(pNetwork);
4697 AssertReturnVoid(pNetwork->hEvtBusyIf != NIL_RTSEMEVENT);
4698 AssertPtr(pSG);
4699 Assert(pSG->cUsers > 0 && pSG->cUsers < 256);
4700
4701 /* do it. */
4702 ++pSG->cUsers;
4703}
4704
4705
4706/** @copydoc INTNETTRUNKSWPORT::pfnSGRelease */
4707static DECLCALLBACK(void) intnetR0TrunkIfPortSGRelease(PINTNETTRUNKSWPORT pSwitchPort, PINTNETSG pSG)
4708{
4709 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
4710 PINTNETNETWORK pNetwork = pThis->pNetwork;
4711
4712 /* assert some sanity */
4713 AssertPtrReturnVoid(pNetwork);
4714 AssertReturnVoid(pNetwork->hEvtBusyIf != NIL_RTSEMEVENT);
4715 AssertPtr(pSG);
4716 Assert(pSG->cUsers > 0);
4717
4718 /*
4719 * Free it?
4720 */
4721 if (!--pSG->cUsers)
4722 {
4723 /** @todo later */
4724 }
4725}
4726
4727
4728/**
4729 * Retain the trunk interface.
4730 *
4731 * @returns pThis if retained.
4732 *
4733 * @param pThis The trunk.
4734 *
4735 * @remarks Any locks.
4736 */
4737static PINTNETTRUNKIF intnetR0TrunkIfRetain(PINTNETTRUNKIF pThis)
4738{
4739 if (pThis && pThis->pIfPort)
4740 {
4741 pThis->pIfPort->pfnRetain(pThis->pIfPort);
4742 return pThis;
4743 }
4744 return NULL;
4745}
4746
4747
4748/**
4749 * Release the trunk interface.
4750 *
4751 * @param pThis The trunk.
4752 */
4753static void intnetR0TrunkIfRelease(PINTNETTRUNKIF pThis)
4754{
4755 if (pThis && pThis->pIfPort)
4756 pThis->pIfPort->pfnRelease(pThis->pIfPort);
4757}
4758
4759
4760/**
4761 * Shutdown the trunk interface.
4762 *
4763 * @param pThis The trunk.
4764 * @param pNetworks The network.
4765 *
4766 * @remarks The caller must hold the global lock.
4767 */
4768static void intnetR0TrunkIfDestroy(PINTNETTRUNKIF pThis, PINTNETNETWORK pNetwork)
4769{
4770 /* assert sanity */
4771 if (!pThis)
4772 return;
4773 AssertPtr(pThis);
4774 Assert(pThis->pNetwork == pNetwork);
4775 AssertPtrNull(pThis->pIfPort);
4776
4777 /*
4778 * The interface has already been deactivated, we just to wait for
4779 * it to become idle before we can disconnect and release it.
4780 */
4781 PINTNETTRUNKIFPORT pIfPort = pThis->pIfPort;
4782 if (pIfPort)
4783 {
4784 /* unset it */
4785 pThis->pIfPort = NULL;
4786
4787 /* wait in portions so we can complain ever now an then. */
4788 uint64_t StartTS = RTTimeSystemNanoTS();
4789 int rc = pIfPort->pfnWaitForIdle(pIfPort, 10*1000);
4790 if (RT_FAILURE(rc))
4791 {
4792 LogRel(("intnet: '%s' did't become idle in %RU64 ns (%Rrc).\n",
4793 pNetwork->szName, RTTimeSystemNanoTS() - StartTS, rc));
4794 Assert(rc == VERR_TIMEOUT);
4795 while ( RT_FAILURE(rc)
4796 && RTTimeSystemNanoTS() - StartTS < UINT64_C(30000000000)) /* 30 sec */
4797 rc = pIfPort->pfnWaitForIdle(pIfPort, 10*1000);
4798 if (rc == VERR_TIMEOUT)
4799 {
4800 LogRel(("intnet: '%s' did't become idle in %RU64 ns (%Rrc).\n",
4801 pNetwork->szName, RTTimeSystemNanoTS() - StartTS, rc));
4802 while ( rc == VERR_TIMEOUT
4803 && RTTimeSystemNanoTS() - StartTS < UINT64_C(360000000000)) /* 360 sec */
4804 rc = pIfPort->pfnWaitForIdle(pIfPort, 30*1000);
4805 if (RT_FAILURE(rc))
4806 {
4807 LogRel(("intnet: '%s' did't become idle in %RU64 ns (%Rrc), giving up.\n",
4808 pNetwork->szName, RTTimeSystemNanoTS() - StartTS, rc));
4809 AssertRC(rc);
4810 }
4811 }
4812 }
4813
4814 /* disconnect & release it. */
4815 pIfPort->pfnDisconnectAndRelease(pIfPort);
4816 }
4817
4818 /*
4819 * Free up the resources.
4820 */
4821 pThis->pNetwork = NULL;
4822 RTSpinlockDestroy(pThis->hDstTabSpinlock);
4823 for (unsigned i = 0; i < RT_ELEMENTS(pThis->apTaskDstTabs); i++)
4824 {
4825 Assert(pThis->apTaskDstTabs[i]);
4826 RTMemFree(pThis->apTaskDstTabs[i]);
4827 pThis->apTaskDstTabs[i] = NULL;
4828 }
4829 for (unsigned i = 0; i < pThis->cIntDstTabs; i++)
4830 {
4831 Assert(pThis->apIntDstTabs[i]);
4832 RTMemFree(pThis->apIntDstTabs[i]);
4833 pThis->apIntDstTabs[i] = NULL;
4834 }
4835 RTMemFree(pThis);
4836}
4837
4838
4839/**
4840 * Creates the trunk connection (if any).
4841 *
4842 * @returns VBox status code.
4843 *
4844 * @param pNetwork The newly created network.
4845 * @param pSession The session handle.
4846 */
4847static int intnetR0NetworkCreateTrunkIf(PINTNETNETWORK pNetwork, PSUPDRVSESSION pSession)
4848{
4849 const char *pszName;
4850 switch (pNetwork->enmTrunkType)
4851 {
4852 /*
4853 * The 'None' case, simple.
4854 */
4855 case kIntNetTrunkType_None:
4856 case kIntNetTrunkType_WhateverNone:
4857 return VINF_SUCCESS;
4858
4859 /* Can't happen, but makes GCC happy. */
4860 default:
4861 return VERR_NOT_IMPLEMENTED;
4862
4863 /*
4864 * Translate enum to component factory name.
4865 */
4866 case kIntNetTrunkType_NetFlt:
4867 pszName = "VBoxNetFlt";
4868 break;
4869 case kIntNetTrunkType_NetAdp:
4870#if defined(RT_OS_DARWIN) && !defined(VBOXNETADP_DO_NOT_USE_NETFLT)
4871 pszName = "VBoxNetFlt";
4872#else /* VBOXNETADP_DO_NOT_USE_NETFLT */
4873 pszName = "VBoxNetAdp";
4874#endif /* VBOXNETADP_DO_NOT_USE_NETFLT */
4875 break;
4876 case kIntNetTrunkType_SrvNat:
4877 pszName = "VBoxSrvNat";
4878 break;
4879 }
4880
4881 /*
4882 * Allocate the trunk interface and associated destination tables.
4883 *
4884 * We take a very optimistic view on the parallelism of the host
4885 * network stack and NIC driver. So, we allocate one table for each
4886 * possible CPU to deal with interrupt time requests and one for task
4887 * time calls.
4888 */
4889 RTCPUID cCpus = RTMpGetCount(); Assert(cCpus > 0);
4890 PINTNETTRUNKIF pTrunk = (PINTNETTRUNKIF)RTMemAllocZ(RT_OFFSETOF(INTNETTRUNKIF, apIntDstTabs[cCpus]));
4891 if (!pTrunk)
4892 return VERR_NO_MEMORY;
4893
4894 Assert(pNetwork->MacTab.cEntriesAllocated > 0);
4895 int rc = VINF_SUCCESS;
4896 pTrunk->cIntDstTabs = cCpus;
4897 for (unsigned i = 0; i < cCpus && RT_SUCCESS(rc); i++)
4898 rc = intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, &pTrunk->apIntDstTabs[i]);
4899 for (unsigned i = 0; i < RT_ELEMENTS(pTrunk->apTaskDstTabs) && RT_SUCCESS(rc); i++)
4900 rc = intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, &pTrunk->apTaskDstTabs[i]);
4901
4902 if (RT_SUCCESS(rc))
4903 {
4904 pTrunk->SwitchPort.u32Version = INTNETTRUNKSWPORT_VERSION;
4905 pTrunk->SwitchPort.pfnPreRecv = intnetR0TrunkIfPortPreRecv;
4906 pTrunk->SwitchPort.pfnRecv = intnetR0TrunkIfPortRecv;
4907 pTrunk->SwitchPort.pfnSGRetain = intnetR0TrunkIfPortSGRetain;
4908 pTrunk->SwitchPort.pfnSGRelease = intnetR0TrunkIfPortSGRelease;
4909 pTrunk->SwitchPort.pfnSetSGPhys = intnetR0TrunkIfPortSetSGPhys;
4910 pTrunk->SwitchPort.pfnReportMacAddress = intnetR0TrunkIfPortReportMacAddress;
4911 pTrunk->SwitchPort.pfnReportPromiscuousMode = intnetR0TrunkIfPortReportPromiscuousMode;
4912 pTrunk->SwitchPort.pfnReportGsoCapabilities = intnetR0TrunkIfPortReportGsoCapabilities;
4913 pTrunk->SwitchPort.pfnReportNoPreemptDsts = intnetR0TrunkIfPortReportNoPreemptDsts;
4914 pTrunk->SwitchPort.u32VersionEnd = INTNETTRUNKSWPORT_VERSION;
4915 //pTrunk->pIfPort = NULL;
4916 pTrunk->pNetwork = pNetwork;
4917 pTrunk->MacAddr.au8[0] = 0xff;
4918 pTrunk->MacAddr.au8[1] = 0xff;
4919 pTrunk->MacAddr.au8[2] = 0xff;
4920 pTrunk->MacAddr.au8[3] = 0xff;
4921 pTrunk->MacAddr.au8[4] = 0xff;
4922 pTrunk->MacAddr.au8[5] = 0xff;
4923 //pTrunk->fPhysSG = false;
4924 //pTrunk->fUnused = false;
4925 //pTrunk->cBusy = 0;
4926 //pTrunk->fNoPreemptDsts = 0;
4927 //pTrunk->fWireGsoCapabilites = 0;
4928 //pTrunk->fHostGsoCapabilites = 0;
4929 //pTrunk->abGsoHdrs = {0};
4930 pTrunk->hDstTabSpinlock = NIL_RTSPINLOCK;
4931 //pTrunk->apTaskDstTabs = above;
4932 //pTrunk->cIntDstTabs = above;
4933 //pTrunk->apIntDstTabs = above;
4934
4935 /*
4936 * Create the lock (we've NIL'ed the members above to simplify cleanup).
4937 */
4938 rc = RTSpinlockCreate(&pTrunk->hDstTabSpinlock);
4939 if (RT_SUCCESS(rc))
4940 {
4941 /*
4942 * There are a couple of bits in MacTab as well pertaining to the
4943 * trunk. We have to set this before it's reported.
4944 *
4945 * Note! We don't need to lock the MacTab here - creation time.
4946 */
4947 pNetwork->MacTab.pTrunk = pTrunk;
4948 pNetwork->MacTab.HostMac = pTrunk->MacAddr;
4949 pNetwork->MacTab.fHostPromiscuous = false;
4950 pNetwork->MacTab.fHostActive = false;
4951 pNetwork->MacTab.fWirePromiscuous = false; /** @todo !!(fFlags & INTNET_OPEN_FLAGS_PROMISC_TRUNK_WIRE); */
4952 pNetwork->MacTab.fWireActive = false;
4953
4954#ifdef IN_RING0 /* (testcase is ring-3) */
4955 /*
4956 * Query the factory we want, then use it create and connect the trunk.
4957 */
4958 PINTNETTRUNKFACTORY pTrunkFactory = NULL;
4959 rc = SUPR0ComponentQueryFactory(pSession, pszName, INTNETTRUNKFACTORY_UUID_STR, (void **)&pTrunkFactory);
4960 if (RT_SUCCESS(rc))
4961 {
4962 rc = pTrunkFactory->pfnCreateAndConnect(pTrunkFactory,
4963 pNetwork->szTrunk,
4964 &pTrunk->SwitchPort,
4965 pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE
4966 ? INTNETTRUNKFACTORY_FLAG_NO_PROMISC
4967 : 0,
4968 &pTrunk->pIfPort);
4969 pTrunkFactory->pfnRelease(pTrunkFactory);
4970 if (RT_SUCCESS(rc))
4971 {
4972 Assert(pTrunk->pIfPort);
4973
4974 Log(("intnetR0NetworkCreateTrunkIf: VINF_SUCCESS - pszName=%s szTrunk=%s%s Network=%s\n",
4975 pszName, pNetwork->szTrunk, pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE ? " shared-mac" : "", pNetwork->szName));
4976 return VINF_SUCCESS;
4977 }
4978 }
4979#else /* IN_RING3 */
4980 rc = VERR_NOT_SUPPORTED;
4981#endif /* IN_RING3 */
4982
4983 pNetwork->MacTab.pTrunk = NULL;
4984 }
4985
4986 /* bail out and clean up. */
4987 RTSpinlockDestroy(pTrunk->hDstTabSpinlock);
4988 }
4989
4990 for (unsigned i = 0; i < RT_ELEMENTS(pTrunk->apTaskDstTabs); i++)
4991 RTMemFree(pTrunk->apTaskDstTabs[i]);
4992 for (unsigned i = 0; i < pTrunk->cIntDstTabs; i++)
4993 RTMemFree(pTrunk->apIntDstTabs[i]);
4994 RTMemFree(pTrunk);
4995
4996 LogFlow(("intnetR0NetworkCreateTrunkIf: %Rrc - pszName=%s szTrunk=%s Network=%s\n",
4997 rc, pszName, pNetwork->szTrunk, pNetwork->szName));
4998 return rc;
4999}
5000
5001
5002
5003/**
5004 * Object destructor callback.
5005 * This is called for reference counted objectes when the count reaches 0.
5006 *
5007 * @param pvObj The object pointer.
5008 * @param pvUser1 Pointer to the network.
5009 * @param pvUser2 Pointer to the INTNET instance data.
5010 */
5011static DECLCALLBACK(void) intnetR0NetworkDestruct(void *pvObj, void *pvUser1, void *pvUser2)
5012{
5013 PINTNETNETWORK pNetwork = (PINTNETNETWORK)pvUser1;
5014 PINTNET pIntNet = (PINTNET)pvUser2;
5015 Log(("intnetR0NetworkDestruct: pvObj=%p pNetwork=%p pIntNet=%p %s\n", pvObj, pNetwork, pIntNet, pNetwork->szName));
5016 Assert(pNetwork->pIntNet == pIntNet);
5017
5018 /* Take the big create/open/destroy sem. */
5019 RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
5020
5021 /*
5022 * Tell the trunk, if present, that we're about to disconnect it and wish
5023 * no further calls from it.
5024 */
5025 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
5026 if (pTrunk)
5027 pTrunk->pIfPort->pfnSetState(pTrunk->pIfPort, INTNETTRUNKIFSTATE_DISCONNECTING);
5028
5029 /*
5030 * Deactivate and orphan any remaining interfaces and wait for them to idle.
5031 *
5032 * Note! Normally there are no more interfaces at this point, however, when
5033 * supdrvCloseSession / supdrvCleanupSession release the objects the
5034 * order is undefined. So, it's quite possible that the network will
5035 * be dereference and destroyed before the interfaces.
5036 */
5037 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
5038 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
5039
5040 uint32_t iIf = pNetwork->MacTab.cEntries;
5041 while (iIf-- > 0)
5042 {
5043 pNetwork->MacTab.paEntries[iIf].fActive = false;
5044 pNetwork->MacTab.paEntries[iIf].pIf->fActive = false;
5045 }
5046
5047 pNetwork->MacTab.fHostActive = false;
5048 pNetwork->MacTab.fWireActive = false;
5049
5050 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
5051
5052 /* Wait for all the interfaces to quiesce. (Interfaces cannot be
5053 removed / added since we're holding the big lock.) */
5054 if (pTrunk)
5055 intnetR0BusyWait(pNetwork, &pTrunk->cBusy);
5056
5057 iIf = pNetwork->MacTab.cEntries;
5058 while (iIf-- > 0)
5059 intnetR0BusyWait(pNetwork, &pNetwork->MacTab.paEntries[iIf].pIf->cBusy);
5060
5061 /* Orphan the interfaces (not trunk). Don't bother with calling
5062 pfnDisconnectInterface here since the networking is going away. */
5063 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
5064 while ((iIf = pNetwork->MacTab.cEntries) > 0)
5065 {
5066 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf - 1].pIf;
5067 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
5068
5069 intnetR0BusyWait(pNetwork, &pIf->cBusy);
5070
5071 RTSpinlockAcquireNoInts(pNetwork->hAddrSpinlock, &Tmp);
5072 if ( iIf == pNetwork->MacTab.cEntries /* paranoia */
5073 && pIf->cBusy)
5074 {
5075 pIf->pNetwork = NULL;
5076 pNetwork->MacTab.cEntries--;
5077 }
5078 }
5079
5080 /*
5081 * Zap the trunk pointer while we still own the spinlock, destroy the
5082 * trunk after we've left it. Note that this might take a while...
5083 */
5084 pNetwork->MacTab.pTrunk = NULL;
5085
5086 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock, &Tmp);
5087
5088 if (pTrunk)
5089 intnetR0TrunkIfDestroy(pTrunk, pNetwork);
5090
5091 /*
5092 * Unlink the network.
5093 * Note that it needn't be in the list if we failed during creation.
5094 */
5095 PINTNETNETWORK pPrev = pIntNet->pNetworks;
5096 if (pPrev == pNetwork)
5097 pIntNet->pNetworks = pNetwork->pNext;
5098 else
5099 {
5100 for (; pPrev; pPrev = pPrev->pNext)
5101 if (pPrev->pNext == pNetwork)
5102 {
5103 pPrev->pNext = pNetwork->pNext;
5104 break;
5105 }
5106 }
5107 pNetwork->pNext = NULL;
5108 pNetwork->pvObj = NULL;
5109
5110 /*
5111 * Free resources.
5112 */
5113 RTSemEventDestroy(pNetwork->hEvtBusyIf);
5114 pNetwork->hEvtBusyIf = NIL_RTSEMEVENT;
5115 RTSpinlockDestroy(pNetwork->hAddrSpinlock);
5116 pNetwork->hAddrSpinlock = NIL_RTSPINLOCK;
5117 RTMemFree(pNetwork->MacTab.paEntries);
5118 pNetwork->MacTab.paEntries = NULL;
5119 RTMemFree(pNetwork);
5120
5121 /* Release the create/destroy sem. */
5122 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
5123}
5124
5125
5126/**
5127 * Opens an existing network.
5128 *
5129 * The call must own the INTNET::hMtxCreateOpenDestroy.
5130 *
5131 * @returns VBox status code.
5132 * @param pIntNet The instance data.
5133 * @param pSession The current session.
5134 * @param pszNetwork The network name. This has a valid length.
5135 * @param enmTrunkType The trunk type.
5136 * @param pszTrunk The trunk name. Its meaning is specfic to the type.
5137 * @param fFlags Flags, see INTNET_OPEN_FLAGS_*.
5138 * @param ppNetwork Where to store the pointer to the network on success.
5139 */
5140static int intnetR0OpenNetwork(PINTNET pIntNet, PSUPDRVSESSION pSession, const char *pszNetwork, INTNETTRUNKTYPE enmTrunkType,
5141 const char *pszTrunk, uint32_t fFlags, PINTNETNETWORK *ppNetwork)
5142{
5143 LogFlow(("intnetR0OpenNetwork: pIntNet=%p pSession=%p pszNetwork=%p:{%s} enmTrunkType=%d pszTrunk=%p:{%s} fFlags=%#x ppNetwork=%p\n",
5144 pIntNet, pSession, pszNetwork, pszNetwork, enmTrunkType, pszTrunk, pszTrunk, fFlags, ppNetwork));
5145
5146 /* just pro forma validation, the caller is internal. */
5147 AssertPtr(pIntNet);
5148 AssertPtr(pSession);
5149 AssertPtr(pszNetwork);
5150 Assert(enmTrunkType > kIntNetTrunkType_Invalid && enmTrunkType < kIntNetTrunkType_End);
5151 AssertPtr(pszTrunk);
5152 Assert(!(fFlags & ~(INTNET_OPEN_FLAGS_MASK)));
5153 AssertPtr(ppNetwork);
5154 *ppNetwork = NULL;
5155
5156 /*
5157 * Search networks by name.
5158 */
5159 PINTNETNETWORK pCur;
5160 uint8_t cchName = (uint8_t)strlen(pszNetwork);
5161 Assert(cchName && cchName < sizeof(pCur->szName)); /* caller ensures this */
5162
5163 pCur = pIntNet->pNetworks;
5164 while (pCur)
5165 {
5166 if ( pCur->cchName == cchName
5167 && !memcmp(pCur->szName, pszNetwork, cchName))
5168 {
5169 /*
5170 * Found the network, now check that we have the same ideas
5171 * about the trunk setup and security.
5172 */
5173 int rc;
5174 if ( enmTrunkType == kIntNetTrunkType_WhateverNone
5175 || ( pCur->enmTrunkType == enmTrunkType
5176 && !strcmp(pCur->szTrunk, pszTrunk)))
5177 {
5178 if (!((pCur->fFlags ^ fFlags) & INTNET_OPEN_FLAGS_COMPATIBILITY_XOR_MASK))
5179 {
5180
5181 /*
5182 * Increment the reference and check that the session
5183 * can access this network.
5184 */
5185 rc = SUPR0ObjAddRef(pCur->pvObj, pSession);
5186 if (RT_SUCCESS(rc))
5187 {
5188 if (!(pCur->fFlags & INTNET_OPEN_FLAGS_PUBLIC))
5189 rc = SUPR0ObjVerifyAccess(pCur->pvObj, pSession, pCur->szName);
5190 if (RT_SUCCESS(rc))
5191 {
5192 pCur->fFlags |= fFlags & INTNET_OPEN_FLAGS_SECURITY_OR_MASK;
5193
5194 *ppNetwork = pCur;
5195 }
5196 else
5197 SUPR0ObjRelease(pCur->pvObj, pSession);
5198 }
5199 else if (rc == VERR_WRONG_ORDER)
5200 rc = VERR_NOT_FOUND; /* destruction race, pretend the other isn't there. */
5201 }
5202 else
5203 rc = VERR_INTNET_INCOMPATIBLE_FLAGS;
5204 }
5205 else
5206 rc = VERR_INTNET_INCOMPATIBLE_TRUNK;
5207
5208 LogFlow(("intnetR0OpenNetwork: returns %Rrc *ppNetwork=%p\n", rc, *ppNetwork));
5209 return rc;
5210 }
5211
5212 pCur = pCur->pNext;
5213 }
5214
5215 LogFlow(("intnetR0OpenNetwork: returns VERR_NOT_FOUND\n"));
5216 return VERR_NOT_FOUND;
5217}
5218
5219
5220/**
5221 * Creates a new network.
5222 *
5223 * The call must own the INTNET::hMtxCreateOpenDestroy and has already attempted
5224 * opening the network and found it to be non-existing.
5225 *
5226 * @returns VBox status code.
5227 * @param pIntNet The instance data.
5228 * @param pSession The session handle.
5229 * @param pszNetwork The name of the network. This must be at least one character long and no longer
5230 * than the INTNETNETWORK::szName.
5231 * @param enmTrunkType The trunk type.
5232 * @param pszTrunk The trunk name. Its meaning is specfic to the type.
5233 * @param fFlags Flags, see INTNET_OPEN_FLAGS_*.
5234 * @param ppNetwork Where to store the network. In the case of failure
5235 * whatever is returned here should be dereferenced
5236 * outside the INTNET::hMtxCreateOpenDestroy.
5237 */
5238static int intnetR0CreateNetwork(PINTNET pIntNet, PSUPDRVSESSION pSession, const char *pszNetwork, INTNETTRUNKTYPE enmTrunkType,
5239 const char *pszTrunk, uint32_t fFlags, PINTNETNETWORK *ppNetwork)
5240{
5241 LogFlow(("intnetR0CreateNetwork: pIntNet=%p pSession=%p pszNetwork=%p:{%s} enmTrunkType=%d pszTrunk=%p:{%s} fFlags=%#x ppNetwork=%p\n",
5242 pIntNet, pSession, pszNetwork, pszNetwork, enmTrunkType, pszTrunk, pszTrunk, fFlags, ppNetwork));
5243
5244 /* just pro forma validation, the caller is internal. */
5245 AssertPtr(pIntNet);
5246 AssertPtr(pSession);
5247 AssertPtr(pszNetwork);
5248 Assert(enmTrunkType > kIntNetTrunkType_Invalid && enmTrunkType < kIntNetTrunkType_End);
5249 AssertPtr(pszTrunk);
5250 Assert(!(fFlags & ~INTNET_OPEN_FLAGS_MASK));
5251 AssertPtr(ppNetwork);
5252 *ppNetwork = NULL;
5253
5254 /*
5255 * Allocate and initialize.
5256 */
5257 size_t cb = sizeof(INTNETNETWORK);
5258 if (fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
5259 cb += INTNETNETWORK_TMP_SIZE + 64;
5260 PINTNETNETWORK pNetwork = (PINTNETNETWORK)RTMemAllocZ(cb);
5261 if (!pNetwork)
5262 return VERR_NO_MEMORY;
5263 //pNetwork->pNext = NULL;
5264 //pNetwork->pIfs = NULL;
5265 pNetwork->hAddrSpinlock = NIL_RTSPINLOCK;
5266 pNetwork->MacTab.cEntries = 0;
5267 pNetwork->MacTab.cEntriesAllocated = INTNET_GROW_DSTTAB_SIZE;
5268 pNetwork->MacTab.paEntries = NULL;
5269 pNetwork->MacTab.fHostPromiscuous = false;
5270 pNetwork->MacTab.fHostActive = false;
5271 pNetwork->MacTab.fWirePromiscuous = false;
5272 pNetwork->MacTab.fWireActive = false;
5273 pNetwork->MacTab.pTrunk = NULL;
5274 pNetwork->hEvtBusyIf = NIL_RTSEMEVENT;
5275 pNetwork->pIntNet = pIntNet;
5276 //pNetwork->pvObj = NULL;
5277 if (fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
5278 pNetwork->pbTmp = RT_ALIGN_PT(pNetwork + 1, 64, uint8_t *);
5279 //else
5280 // pNetwork->pbTmp = NULL;
5281 pNetwork->fFlags = fFlags;
5282 //pNetwork->cActiveIFs = 0;
5283 size_t cchName = strlen(pszNetwork);
5284 pNetwork->cchName = (uint8_t)cchName;
5285 Assert(cchName && cchName < sizeof(pNetwork->szName)); /* caller's responsibility. */
5286 memcpy(pNetwork->szName, pszNetwork, cchName); /* '\0' at courtesy of alloc. */
5287 pNetwork->enmTrunkType = enmTrunkType;
5288 Assert(strlen(pszTrunk) < sizeof(pNetwork->szTrunk)); /* caller's responsibility. */
5289 strcpy(pNetwork->szTrunk, pszTrunk);
5290
5291 /*
5292 * Create the semaphore, spinlock and allocate the interface table.
5293 */
5294 int rc = RTSemEventCreate(&pNetwork->hEvtBusyIf);
5295 if (RT_SUCCESS(rc))
5296 rc = RTSpinlockCreate(&pNetwork->hAddrSpinlock);
5297 if (RT_SUCCESS(rc))
5298 {
5299 pNetwork->MacTab.paEntries = (PINTNETMACTABENTRY)RTMemAlloc(sizeof(INTNETMACTABENTRY) * pNetwork->MacTab.cEntriesAllocated);
5300 if (!pNetwork->MacTab.paEntries)
5301 rc = VERR_NO_MEMORY;
5302 }
5303 if (RT_SUCCESS(rc))
5304 {
5305 /*
5306 * Register the object in the current session and link it into the network list.
5307 */
5308 pNetwork->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_INTERNAL_NETWORK, intnetR0NetworkDestruct, pNetwork, pIntNet);
5309 if (pNetwork->pvObj)
5310 {
5311 pNetwork->pNext = pIntNet->pNetworks;
5312 pIntNet->pNetworks = pNetwork;
5313
5314 /*
5315 * Check if the current session is actually allowed to create and
5316 * open the network. It is possible to implement network name
5317 * based policies and these must be checked now. SUPR0ObjRegister
5318 * does no such checks.
5319 */
5320 rc = SUPR0ObjVerifyAccess(pNetwork->pvObj, pSession, pNetwork->szName);
5321 if (RT_SUCCESS(rc))
5322 {
5323 /*
5324 * Connect the trunk.
5325 */
5326 rc = intnetR0NetworkCreateTrunkIf(pNetwork, pSession);
5327 if (RT_SUCCESS(rc))
5328 {
5329 *ppNetwork = pNetwork;
5330 LogFlow(("intnetR0CreateNetwork: returns VINF_SUCCESS *ppNetwork=%p\n", pNetwork));
5331 return VINF_SUCCESS;
5332 }
5333 }
5334
5335 SUPR0ObjRelease(pNetwork->pvObj, pSession);
5336 LogFlow(("intnetR0CreateNetwork: returns %Rrc\n", rc));
5337 return rc;
5338 }
5339
5340 /* cleanup */
5341 rc = VERR_NO_MEMORY;
5342 }
5343
5344 RTSemEventDestroy(pNetwork->hEvtBusyIf);
5345 pNetwork->hEvtBusyIf = NIL_RTSEMEVENT;
5346 RTSpinlockDestroy(pNetwork->hAddrSpinlock);
5347 pNetwork->hAddrSpinlock = NIL_RTSPINLOCK;
5348 RTMemFree(pNetwork->MacTab.paEntries);
5349 pNetwork->MacTab.paEntries = NULL;
5350 RTMemFree(pNetwork);
5351
5352 LogFlow(("intnetR0CreateNetwork: returns %Rrc\n", rc));
5353 return rc;
5354}
5355
5356
5357/**
5358 * Opens a network interface and connects it to the specified network.
5359 *
5360 * @returns VBox status code.
5361 * @param pSession The session handle.
5362 * @param pszNetwork The network name.
5363 * @param enmTrunkType The trunk type.
5364 * @param pszTrunk The trunk name. Its meaning is specfic to the type.
5365 * @param fFlags Flags, see INTNET_OPEN_FLAGS_*.
5366 * @param fRestrictAccess Whether new participants should be subjected to access check or not.
5367 * @param cbSend The send buffer size.
5368 * @param cbRecv The receive buffer size.
5369 * @param phIf Where to store the handle to the network interface.
5370 */
5371INTNETR0DECL(int) IntNetR0Open(PSUPDRVSESSION pSession, const char *pszNetwork,
5372 INTNETTRUNKTYPE enmTrunkType, const char *pszTrunk, uint32_t fFlags,
5373 uint32_t cbSend, uint32_t cbRecv, PINTNETIFHANDLE phIf)
5374{
5375 LogFlow(("IntNetR0Open: pSession=%p pszNetwork=%p:{%s} enmTrunkType=%d pszTrunk=%p:{%s} fFlags=%#x cbSend=%u cbRecv=%u phIf=%p\n",
5376 pSession, pszNetwork, pszNetwork, enmTrunkType, pszTrunk, pszTrunk, fFlags, cbSend, cbRecv, phIf));
5377
5378 /*
5379 * Validate input.
5380 */
5381 PINTNET pIntNet = g_pIntNet;
5382 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
5383 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
5384
5385 AssertPtrReturn(pszNetwork, VERR_INVALID_PARAMETER);
5386 const char *pszNetworkEnd = RTStrEnd(pszNetwork, INTNET_MAX_NETWORK_NAME);
5387 AssertReturn(pszNetworkEnd, VERR_INVALID_PARAMETER);
5388 size_t cchNetwork = pszNetworkEnd - pszNetwork;
5389 AssertReturn(cchNetwork, VERR_INVALID_PARAMETER);
5390
5391 if (pszTrunk)
5392 {
5393 AssertPtrReturn(pszTrunk, VERR_INVALID_PARAMETER);
5394 const char *pszTrunkEnd = RTStrEnd(pszTrunk, INTNET_MAX_TRUNK_NAME);
5395 AssertReturn(pszTrunkEnd, VERR_INVALID_PARAMETER);
5396 }
5397 else
5398 pszTrunk = "";
5399
5400 AssertMsgReturn(enmTrunkType > kIntNetTrunkType_Invalid && enmTrunkType < kIntNetTrunkType_End,
5401 ("%d\n", enmTrunkType), VERR_INVALID_PARAMETER);
5402 switch (enmTrunkType)
5403 {
5404 case kIntNetTrunkType_None:
5405 case kIntNetTrunkType_WhateverNone:
5406 if (*pszTrunk)
5407 return VERR_INVALID_PARAMETER;
5408 break;
5409
5410 case kIntNetTrunkType_NetFlt:
5411 case kIntNetTrunkType_NetAdp:
5412 if (!*pszTrunk)
5413 return VERR_INVALID_PARAMETER;
5414 break;
5415
5416 default:
5417 return VERR_NOT_IMPLEMENTED;
5418 }
5419
5420 AssertMsgReturn(!(fFlags & ~INTNET_OPEN_FLAGS_MASK), ("%#x\n", fFlags), VERR_INVALID_PARAMETER);
5421 AssertPtrReturn(phIf, VERR_INVALID_PARAMETER);
5422
5423 /*
5424 * Acquire the mutex to serialize open/create/close.
5425 */
5426 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
5427 if (RT_FAILURE(rc))
5428 return rc;
5429
5430 /*
5431 * Try open / create the network and create an interface on it for the
5432 * caller to use.
5433 */
5434 PINTNETNETWORK pNetwork = NULL;
5435 rc = intnetR0OpenNetwork(pIntNet, pSession, pszNetwork, enmTrunkType, pszTrunk, fFlags, &pNetwork);
5436 if (RT_SUCCESS(rc))
5437 {
5438 rc = intnetR0NetworkCreateIf(pNetwork, pSession, cbSend, cbRecv, phIf);
5439 if (RT_SUCCESS(rc))
5440 rc = VINF_ALREADY_INITIALIZED;
5441 else
5442 SUPR0ObjRelease(pNetwork->pvObj, pSession);
5443 }
5444 else if (rc == VERR_NOT_FOUND)
5445 {
5446 rc = intnetR0CreateNetwork(pIntNet, pSession, pszNetwork, enmTrunkType, pszTrunk, fFlags, &pNetwork);
5447 if (RT_SUCCESS(rc))
5448 {
5449 rc = intnetR0NetworkCreateIf(pNetwork, pSession, cbSend, cbRecv, phIf);
5450 if (RT_FAILURE(rc))
5451 SUPR0ObjRelease(pNetwork->pvObj, pSession);
5452 }
5453 }
5454
5455 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
5456 LogFlow(("IntNetR0Open: return %Rrc *phIf=%RX32\n", rc, *phIf));
5457 return rc;
5458}
5459
5460
5461/**
5462 * VMMR0 request wrapper for IntNetR0Open.
5463 *
5464 * @returns see GMMR0MapUnmapChunk.
5465 * @param pSession The caller's session.
5466 * @param pReq The request packet.
5467 */
5468INTNETR0DECL(int) IntNetR0OpenReq(PSUPDRVSESSION pSession, PINTNETOPENREQ pReq)
5469{
5470 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
5471 return VERR_INVALID_PARAMETER;
5472 return IntNetR0Open(pSession, &pReq->szNetwork[0], pReq->enmTrunkType, pReq->szTrunk,
5473 pReq->fFlags, pReq->cbSend, pReq->cbRecv, &pReq->hIf);
5474}
5475
5476
5477/**
5478 * Count the internal networks.
5479 *
5480 * This is mainly for providing the testcase with some introspection to validate
5481 * behavior when closing interfaces.
5482 *
5483 * @returns The number of networks.
5484 */
5485INTNETR0DECL(uint32_t) IntNetR0GetNetworkCount(void)
5486{
5487 /*
5488 * Grab the instance.
5489 */
5490 PINTNET pIntNet = g_pIntNet;
5491 if (!pIntNet)
5492 return 0;
5493 AssertPtrReturn(pIntNet, 0);
5494 AssertReturn(pIntNet->u32Magic == INTNET_MAGIC, 0);
5495
5496 /*
5497 * Grab the mutex and count the networks.
5498 */
5499 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
5500 if (RT_FAILURE(rc))
5501 return 0;
5502
5503 uint32_t cNetworks = 0;
5504 for (PINTNETNETWORK pCur = pIntNet->pNetworks; pCur; pCur = pCur->pNext)
5505 cNetworks++;
5506
5507 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
5508
5509 return cNetworks;
5510}
5511
5512
5513
5514/**
5515 * Destroys an instance of the Ring-0 internal networking service.
5516 */
5517INTNETR0DECL(void) IntNetR0Term(void)
5518{
5519 LogFlow(("IntNetR0Term:\n"));
5520
5521 /*
5522 * Zap the global pointer and validate it.
5523 */
5524 PINTNET pIntNet = g_pIntNet;
5525 g_pIntNet = NULL;
5526 if (!pIntNet)
5527 return;
5528 AssertPtrReturnVoid(pIntNet);
5529 AssertReturnVoid(pIntNet->u32Magic == INTNET_MAGIC);
5530
5531 /*
5532 * There is not supposed to be any networks hanging around at this time.
5533 */
5534 AssertReturnVoid(ASMAtomicCmpXchgU32(&pIntNet->u32Magic, ~INTNET_MAGIC, INTNET_MAGIC));
5535 Assert(pIntNet->pNetworks == NULL);
5536 if (pIntNet->hMtxCreateOpenDestroy != NIL_RTSEMMUTEX)
5537 {
5538 RTSemMutexDestroy(pIntNet->hMtxCreateOpenDestroy);
5539 pIntNet->hMtxCreateOpenDestroy = NIL_RTSEMMUTEX;
5540 }
5541 if (pIntNet->hHtIfs != NIL_RTHANDLETABLE)
5542 {
5543 /** @todo does it make sense to have a deleter here? */
5544 RTHandleTableDestroy(pIntNet->hHtIfs, NULL, NULL);
5545 pIntNet->hHtIfs = NIL_RTHANDLETABLE;
5546 }
5547
5548 RTMemFree(pIntNet);
5549}
5550
5551
5552/**
5553 * Initalizes the internal network ring-0 service.
5554 *
5555 * @returns VBox status code.
5556 */
5557INTNETR0DECL(int) IntNetR0Init(void)
5558{
5559 LogFlow(("IntNetR0Init:\n"));
5560 int rc = VERR_NO_MEMORY;
5561 PINTNET pIntNet = (PINTNET)RTMemAllocZ(sizeof(*pIntNet));
5562 if (pIntNet)
5563 {
5564 //pIntNet->pNetworks = NULL;
5565
5566 rc = RTSemMutexCreate(&pIntNet->hMtxCreateOpenDestroy);
5567 if (RT_SUCCESS(rc))
5568 {
5569 rc = RTHandleTableCreateEx(&pIntNet->hHtIfs, RTHANDLETABLE_FLAGS_LOCKED | RTHANDLETABLE_FLAGS_CONTEXT,
5570 UINT32_C(0x8ffe0000), 4096, intnetR0IfRetainHandle, NULL);
5571 if (RT_SUCCESS(rc))
5572 {
5573 pIntNet->u32Magic = INTNET_MAGIC;
5574 g_pIntNet = pIntNet;
5575 LogFlow(("IntNetR0Init: returns VINF_SUCCESS pIntNet=%p\n", pIntNet));
5576 return VINF_SUCCESS;
5577 }
5578
5579 RTSemMutexDestroy(pIntNet->hMtxCreateOpenDestroy);
5580 }
5581 RTMemFree(pIntNet);
5582 }
5583 LogFlow(("IntNetR0Init: returns %Rrc\n", rc));
5584 return rc;
5585}
5586
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use