Index: /trunk/src/VBox/Devices/Network/DevE1000.cpp
===================================================================
--- /trunk/src/VBox/Devices/Network/DevE1000.cpp	(revision 41045)
+++ /trunk/src/VBox/Devices/Network/DevE1000.cpp	(revision 41046)
@@ -81,4 +81,8 @@
  */
 //#define E1K_WITH_MSI
+/*
+ * E1K_WITH_TX_CS protects e1kXmitPending with a critical section.
+ */
+#define E1K_WITH_TX_CS 1
 /*
  * E1K_WITH_TXD_CACHE causes E1000 to fetch multiple TX descriptors in a
@@ -1010,5 +1014,7 @@
     PDMCRITSECT cs;                  /**< Critical section - what is it protecting? */
     PDMCRITSECT csRx;                                     /**< RX Critical section. */
-//    PDMCRITSECT csTx;                                     /**< TX Critical section. */
+#ifdef E1K_WITH_TX_CS
+    PDMCRITSECT csTx;                                     /**< TX Critical section. */
+#endif /* E1K_WITH_TX_CS */
     /** Base address of memory-mapped registers. */
     RTGCPHYS    addrMMReg;
@@ -1488,8 +1494,11 @@
 #define e1kCsRxLeave(ps) PDMCritSectLeave(&ps->csRx)
 
+#ifndef E1K_WITH_TX_CS
 #define e1kCsTxEnter(ps, rc) VINF_SUCCESS
 #define e1kCsTxLeave(ps) do { } while (0)
-//# define e1kCsTxEnter(ps, rc) PDMCritSectEnter(&ps->csTx, rc)
-//# define e1kCsTxLeave(ps) PDMCritSectLeave(&ps->csTx)
+#else /* E1K_WITH_TX_CS */
+# define e1kCsTxEnter(ps, rc) PDMCritSectEnter(&ps->csTx, rc)
+# define e1kCsTxLeave(ps) PDMCritSectLeave(&ps->csTx)
+#endif /* E1K_WITH_TX_CS */
 
 #ifdef IN_RING3
@@ -3224,20 +3233,24 @@
 DECLINLINE(unsigned) e1kTxDLoadMore(E1KSTATE* pState)
 {
-    unsigned nDescsAvailable = e1kGetTxLen(pState);
-    unsigned nDescsToFetch = RT_MIN(nDescsAvailable, E1K_TXD_CACHE_SIZE - pState->nTxDFetched);
-    /*
-     * It is safe to use TDLEN and TDH in the following expression since TDLEN
-     * is set during init and never changes after that, and TDH is advanced in
-     * the loop we are being called from.
-     */
-    unsigned nDescsInSingleRead = RT_MIN(nDescsToFetch, TDLEN / sizeof(E1KTXDESC) - TDH);
+    /* We've already loaded pState->nTxDFetched descriptors past TDH. */
+    unsigned nDescsAvailable    = e1kGetTxLen(pState) - pState->nTxDFetched;
+    unsigned nDescsToFetch      = RT_MIN(nDescsAvailable, E1K_TXD_CACHE_SIZE - pState->nTxDFetched);
+    unsigned nDescsTotal        = TDLEN / sizeof(E1KTXDESC);
+    unsigned nFirstNotLoaded    = (TDH + pState->nTxDFetched) % nDescsTotal;
+    unsigned nDescsInSingleRead = RT_MIN(nDescsToFetch, nDescsTotal - nFirstNotLoaded);
+    E1kLog3(("%s e1kTxDLoadMore: nDescsAvailable=%u nDescsToFetch=%u "
+             "nDescsTotal=%u nFirstNotLoaded=0x%x nDescsInSingleRead=%u\n",
+             INSTANCE(pState), nDescsAvailable, nDescsToFetch, nDescsTotal,
+             nFirstNotLoaded, nDescsInSingleRead));
     if (nDescsToFetch == 0)
         return 0;
     E1KTXDESC* pFirstEmptyDesc = &pState->aTxDescriptors[pState->nTxDFetched];
     PDMDevHlpPhysRead(pState->CTX_SUFF(pDevIns),
-                      ((uint64_t)TDBAH << 32) + TDBAL + TDH * sizeof(E1KTXDESC),
+                      ((uint64_t)TDBAH << 32) + TDBAL + nFirstNotLoaded * sizeof(E1KTXDESC),
                       pFirstEmptyDesc, nDescsInSingleRead * sizeof(E1KTXDESC));
-    E1kLog3(("%s Fetched %u TX descriptors at %08x%08x, TDLEN=%08x, TDH=%08x, TDT=%08x\n",
-             INSTANCE(pState), nDescsInSingleRead, TDBAH, TDBAL + TDH * sizeof(E1KTXDESC), TDLEN, TDH, TDT));
+    E1kLog3(("%s Fetched %u TX descriptors at %08x%08x(0x%x), TDLEN=%08x, TDH=%08x, TDT=%08x\n",
+             INSTANCE(pState), nDescsInSingleRead,
+             TDBAH, TDBAL + TDH * sizeof(E1KTXDESC),
+             nFirstNotLoaded, TDLEN, TDH, TDT));
     if (nDescsToFetch > nDescsInSingleRead)
     {
@@ -3246,7 +3259,7 @@
                           pFirstEmptyDesc + nDescsInSingleRead,
                           (nDescsToFetch - nDescsInSingleRead) * sizeof(E1KTXDESC));
-        E1kLog3(("%s Fetched %u TX descriptors at %08x%08x, TDLEN=%08x, TDH=%08x, TDT=%08x\n",
+        E1kLog3(("%s Fetched %u TX descriptors at %08x%08x\n",
                  INSTANCE(pState), nDescsToFetch - nDescsInSingleRead,
-                 TDBAH, TDBAL, TDLEN, TDH, TDT));
+                 TDBAH, TDBAL));
     }
     pState->nTxDFetched += nDescsToFetch;
@@ -4513,4 +4526,7 @@
     int rc = VINF_SUCCESS;
 
+    /* Check if transmitter is enabled. */
+    if (!(TCTL & TCTL_EN))
+        return VINF_SUCCESS;
     /*
      * Grab the xmit lock of the driver as well as the E1K device state.
@@ -4523,5 +4539,5 @@
             return rc;
     }
-    //rc = e1kCsTxEnter(pState, VERR_SEM_BUSY);
+    rc = e1kCsTxEnter(pState, VERR_SEM_BUSY);
     if (RT_LIKELY(rc == VINF_SUCCESS))
     {
@@ -4556,5 +4572,5 @@
         /// @todo: uncomment: pState->uStatIntTXQE++;
         /// @todo: uncomment: e1kRaiseInterrupt(pState, ICR_TXQE);
-        //e1kCsTxLeave(pState);
+        e1kCsTxLeave(pState);
     }
 
@@ -4585,4 +4601,7 @@
     int rc = VINF_SUCCESS;
 
+    /* Check if transmitter is enabled. */
+    if (!(TCTL & TCTL_EN))
+        return VINF_SUCCESS;
     /*
      * Grab the xmit lock of the driver as well as the E1K device state.
@@ -4600,64 +4619,93 @@
      * Note! Do not process descriptors in locked state
      */
-    STAM_PROFILE_ADV_START(&pState->CTX_SUFF_Z(StatTransmit), a);
-    while (!pState->fLocked && e1kTxDLazyLoad(pState))
-    {
-        while (e1kLocateTxPacket(pState))
-        {
-            /* Found a complete packet, allocate it. */
-            rc = e1kXmitAllocBuf(pState, pState->fGSO);
-            /* If we're out of bandwidth we'll come back later. */
-            if (RT_FAILURE(rc))
+    rc = e1kCsTxEnter(pState, VERR_SEM_BUSY);
+    if (RT_LIKELY(rc == VINF_SUCCESS))
+    {
+        STAM_PROFILE_ADV_START(&pState->CTX_SUFF_Z(StatTransmit), a);
+        /*
+         * fIncomplete is set whenever we try to fetch additional descriptors
+         * for an incomplete packet. If fail to locate a complete packet on
+         * the next iteration we need to reset the cache or we risk to get
+         * stuck in this loop forever.
+         */
+        bool fIncomplete = false;
+        while (!pState->fLocked && e1kTxDLazyLoad(pState))
+        {
+            while (e1kLocateTxPacket(pState))
+            {
+                fIncomplete = false;
+                /* Found a complete packet, allocate it. */
+                rc = e1kXmitAllocBuf(pState, pState->fGSO);
+                /* If we're out of bandwidth we'll come back later. */
+                if (RT_FAILURE(rc))
+                    goto out;
+                /* Copy the packet to allocated buffer and send it. */
+                rc = e1kXmitPacket(pState, fOnWorkerThread);
+                /* If we're out of bandwidth we'll come back later. */
+                if (RT_FAILURE(rc))
+                    goto out;
+            }
+            uint8_t u8Remain = pState->nTxDFetched - pState->iTxDCurrent;
+            if (RT_UNLIKELY(fIncomplete))
+            {
+                /*
+                 * The descriptor cache is full, but we were unable to find
+                 * a complete packet in it. Drop the cache and hope that
+                 * the guest driver can recover from network card error.
+                 */
+                LogRel(("%s No complete packets in%s TxD cache! "
+                      "Fetched=%d, current=%d, TX len=%d.\n",
+                      INSTANCE(pState),
+                      u8Remain == E1K_TXD_CACHE_SIZE ? " full" : "",
+                      pState->nTxDFetched, pState->iTxDCurrent,
+                      e1kGetTxLen(pState)));
+                Log4(("%s No complete packets in%s TxD cache! "
+                      "Fetched=%d, current=%d, TX len=%d. Dump follows:\n",
+                      INSTANCE(pState),
+                      u8Remain == E1K_TXD_CACHE_SIZE ? " full" : "",
+                      pState->nTxDFetched, pState->iTxDCurrent,
+                      e1kGetTxLen(pState)));
+                e1kDumpTxDCache(pState);
+                pState->iTxDCurrent = pState->nTxDFetched = 0;
+                rc = VERR_NET_IO_ERROR;
                 goto out;
-            /* Copy the packet to allocated buffer and send it. */
-            rc = e1kXmitPacket(pState, fOnWorkerThread);
-            /* If we're out of bandwidth we'll come back later. */
-            if (RT_FAILURE(rc))
-                goto out;
-        }
-        uint8_t u8Remain = pState->nTxDFetched - pState->iTxDCurrent;
-        if (RT_UNLIKELY(u8Remain == E1K_TXD_CACHE_SIZE))
-        {
-            /*
-             * The descriptor cache is full, but we were unable to find
-             * a complete packet in it. Drop the cache and hope that
-             * the guest driver can recover from network card error.
-             */
-            Log4(("%s No complete packets in full TxD cache! "
-                  "Fetched=%d, TX len=%d. Dump follows:\n",
-                  INSTANCE(pState), pState->nTxDFetched, e1kGetTxLen(pState)));
-            e1kDumpTxDCache(pState);
-            pState->nTxDFetched = 0;
-            rc = VERR_NET_IO_ERROR;
-            goto out;
-        }
-        if (u8Remain > 0)
-        {
-            /*
-             * A packet was partially fetched. Move incomplete packet to
-             * the beginning of cache buffer, then load more descriptors.
-             */
-            memmove(pState->aTxDescriptors,
-                    &pState->aTxDescriptors[pState->iTxDCurrent],
-                    u8Remain * sizeof(E1KTXDESC));
-            pState->nTxDFetched = u8Remain;
-            e1kTxDLoadMore(pState);
-        }
-        else
-            pState->nTxDFetched = 0;
-        pState->iTxDCurrent = 0;
-    }
-    if (!pState->fLocked && GET_BITS(TXDCTL, LWTHRESH) == 0)
-    {
-        E1kLog2(("%s Out of transmit descriptors, raise ICR.TXD_LOW\n",
-                 INSTANCE(pState)));
-        e1kRaiseInterrupt(pState, VERR_SEM_BUSY, ICR_TXD_LOW);
-    }
-
+            }
+            if (u8Remain > 0)
+            {
+                Log4(("%s Incomplete packet at %d. Already fetched %d, "
+                      "%d more are available\n",
+                      INSTANCE(pState), pState->iTxDCurrent, u8Remain,
+                      e1kGetTxLen(pState) - u8Remain));
+                      
+                /*
+                 * A packet was partially fetched. Move incomplete packet to
+                 * the beginning of cache buffer, then load more descriptors.
+                 */
+                memmove(pState->aTxDescriptors,
+                        &pState->aTxDescriptors[pState->iTxDCurrent],
+                        u8Remain * sizeof(E1KTXDESC));
+                pState->nTxDFetched = u8Remain;
+                e1kTxDLoadMore(pState);
+                fIncomplete = true;
+            }
+            else
+                pState->nTxDFetched = 0;
+            pState->iTxDCurrent = 0;
+        }
+        if (!pState->fLocked && GET_BITS(TXDCTL, LWTHRESH) == 0)
+        {
+            E1kLog2(("%s Out of transmit descriptors, raise ICR.TXD_LOW\n",
+                     INSTANCE(pState)));
+            e1kRaiseInterrupt(pState, VERR_SEM_BUSY, ICR_TXD_LOW);
+        }
 out:
-    STAM_PROFILE_ADV_STOP(&pState->CTX_SUFF_Z(StatTransmit), a);
-
-    /// @todo: uncomment: pState->uStatIntTXQE++;
-    /// @todo: uncomment: e1kRaiseInterrupt(pState, ICR_TXQE);
+        STAM_PROFILE_ADV_STOP(&pState->CTX_SUFF_Z(StatTransmit), a);
+
+        /// @todo: uncomment: pState->uStatIntTXQE++;
+        /// @todo: uncomment: e1kRaiseInterrupt(pState, ICR_TXQE);
+
+        e1kCsTxLeave(pState);
+    }
+
 
     /*
@@ -4727,8 +4775,5 @@
 static int e1kRegWriteTDT(E1KSTATE* pState, uint32_t offset, uint32_t index, uint32_t value)
 {
-    int rc = e1kCsTxEnter(pState, VINF_IOM_R3_MMIO_WRITE);
-    if (RT_UNLIKELY(rc != VINF_SUCCESS))
-        return rc;
-    rc = e1kRegWriteDefault(pState, offset, index, value);
+    int rc = e1kRegWriteDefault(pState, offset, index, value);
 
     /* All descriptors starting with head and not including tail belong to us. */
@@ -4743,5 +4788,4 @@
         E1kLog(("%s e1kRegWriteTDT: %d descriptors to process\n",
                  INSTANCE(pState), e1kGetTxLen(pState)));
-        e1kCsTxLeave(pState);
 
         /* Transmit pending packets if possible, defer it if we cannot do it
@@ -4760,9 +4804,9 @@
             if (rc == VERR_TRY_AGAIN)
                 rc = VINF_SUCCESS;
+            else if (rc == VERR_SEM_BUSY)
+                rc = VINF_IOM_R3_IOPORT_WRITE;
             AssertRC(rc);
         }
     }
-    else
-        e1kCsTxLeave(pState);
 
     return rc;
@@ -6298,8 +6342,13 @@
     pState->u64AckedAt   = 0;
 #ifdef E1K_WITH_TXD_CACHE
-    pState->nTxDFetched  = 0;
-    pState->iTxDCurrent  = 0;
-    pState->fGSO         = false;
-    pState->cbTxAlloc    = 0;
+    int rc = e1kCsTxEnter(pState, VERR_SEM_BUSY);
+    if (RT_LIKELY(rc == VINF_SUCCESS))
+    {
+        pState->nTxDFetched  = 0;
+        pState->iTxDCurrent  = 0;
+        pState->fGSO         = false;
+        pState->cbTxAlloc    = 0;
+        e1kCsTxLeave(pState);
+    }
 #endif /* E1K_WITH_TXD_CACHE */
     e1kHardReset(pState);
@@ -6375,6 +6424,8 @@
             pState->hEventMoreRxDescAvail = NIL_RTSEMEVENT;
         }
+#ifdef E1K_WITH_TX_CS
+        PDMR3CritSectDelete(&pState->csTx);
+#endif /* E1K_WITH_TX_CS */
         PDMR3CritSectDelete(&pState->csRx);
-        //PDMR3CritSectDelete(&pState->csTx);
         PDMR3CritSectDelete(&pState->cs);
     }
@@ -6721,4 +6772,9 @@
     if (RT_FAILURE(rc))
         return rc;
+#ifdef E1K_WITH_TX_CS
+    rc = PDMDevHlpCritSectInit(pDevIns, &pState->csTx, RT_SRC_POS, "%sTX", pState->szInstance);
+    if (RT_FAILURE(rc))
+        return rc;
+#endif /* E1K_WITH_TX_CS */
 
     /* Set PCI config registers */
