VirtualBox

Changeset 104987 in vbox


Ignore:
Timestamp:
Jun 20, 2024 3:22:24 PM (3 months ago)
Author:
vboxsync
Message:

VMM/IEM: Make the regular access-with-in-page-check 1-2 instructions shorter for well aligned data while putting the misaligned variant out of the straight code stream. This means larger TBs, but seems to be slightly faster. bugref:10687

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/include/IEMN8veRecompilerTlbLookup.h

    r104984 r104987  
    391391
    392392    /*
     393     * Snippet for checking whether misaligned accesses are within the
     394     * page (see step 2).
     395     *
     396     * This sequence is 1 instruction longer than the strict alignment test,
     397     * and since most accesses are correctly aligned it is better to do it
     398     * this way.  Runs of r163597 seems to indicate there was a regression
     399     * when placing this code in the main code flow.
     400     */
     401    uint8_t const idxRegFlatPtr = iSegReg != UINT8_MAX || pTlbState->idxRegPtr == UINT8_MAX || offDisp != 0
     402                                ? idxRegMemResult : pTlbState->idxRegPtr; /* (not immediately ready for tlblookup use) */
     403    uint8_t const fAlignMask    = a_fDataTlb ? (uint8_t)fAlignMaskAndCtl : 0;
     404    if (a_fDataTlb)
     405    {
     406        Assert(!(fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_SSE | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_GP_OR_AC)));
     407        Assert(RT_IS_POWER_OF_TWO(fAlignMask + 1U));
     408        Assert(cbMem == fAlignMask + 1U || !(fAccess & IEM_ACCESS_ATOMIC));
     409        Assert(cbMem < 128); /* alignment test assumptions */
     410    }
     411
     412    uint32_t offMisalignedAccess             = UINT32_MAX;
     413    uint32_t offFixupMisalignedAccessJmpBack = UINT32_MAX;
     414    if (   a_fDataTlb
     415        && !(fAlignMaskAndCtl & ~UINT32_C(0xff))
     416        && !(fAccess & IEM_ACCESS_ATOMIC)
     417        && cbMem > 1
     418        && RT_IS_POWER_OF_TWO(cbMem)
     419        && !(pReNative->fExec & IEM_F_X86_AC))
     420    {
     421        /* tlbmisaligned: */
     422        offMisalignedAccess = off;
     423        /* reg1 = regflat & 0xfff */
     424        off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1,/*=*/ idxRegFlatPtr,/*&*/ GUEST_PAGE_OFFSET_MASK);
     425        /* cmp reg1, GUEST_PAGE_SIZE - cbMem */
     426        off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, GUEST_PAGE_SIZE - cbMem);
     427        /* jbe short jmpback */
     428        offFixupMisalignedAccessJmpBack = off;
     429        off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 100, kIemNativeInstrCond_be);
     430#ifdef IEM_WITH_TLB_STATISTICS
     431        off = iemNativeEmitIncU32CounterInVCpuEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2,
     432                                                 offVCpuTlb + RT_UOFFSETOF(IEMTLB, cTlbNativeMissCrossPage));
     433#endif
     434        off = iemNativeEmitJmpToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss);
     435    }
     436
     437    /*
    393438     * tlblookup:
    394439     */
     
    515560           this step is required or if the address is a constant (simplicity) or
    516561           if offDisp is non-zero. */
    517     uint8_t const idxRegFlatPtr = iSegReg != UINT8_MAX || pTlbState->idxRegPtr == UINT8_MAX || offDisp != 0
    518                                 ? idxRegMemResult : pTlbState->idxRegPtr;
    519562    if (iSegReg != UINT8_MAX)
    520563    {
     
    575618    if (a_fDataTlb)
    576619    {
    577         uint8_t const fAlignMask = (uint8_t)fAlignMaskAndCtl;
    578         Assert(!(fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_SSE | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_GP_OR_AC)));
    579         Assert(RT_IS_POWER_OF_TWO(fAlignMask + 1U));
    580         Assert(cbMem == fAlignMask + 1U || !(fAccess & IEM_ACCESS_ATOMIC));
    581         Assert(cbMem < 128); /* alignment test assumptions */
    582 
    583         /*
    584          * 2a. Strict alignment check using fAlignMask for atomic, strictly
    585          *     aligned stuff (SSE & AVX) and AC=1 (ring-3).
    586          */
    587         bool const fStrictAlignmentCheck = fAlignMask
    588                                         && (   (fAlignMaskAndCtl & ~UINT32_C(0xff))
    589                                             || (fAccess & IEM_ACCESS_ATOMIC)
    590                                             || (pReNative->fExec & IEM_F_X86_AC) );
    591         if (fStrictAlignmentCheck)
    592         {
    593             /* test regflat, fAlignMask */
    594             off = iemNativeEmitTestAnyBitsInGpr8Ex(pCodeBuf, off, idxRegFlatPtr, fAlignMask);
     620        if (offMisalignedAccess != UINT32_MAX)
     621        {
     622#ifdef RT_ARCH_ARM64
     623            if (cbMem == 2)
     624            {
     625                /* tbnz regflatptr, #0, tlbmiss */
     626                pCodeBuf[off++] = Armv8A64MkInstrTbnz((int32_t)offMisalignedAccess - (int32_t)off, idxRegFlatPtr, 0);
     627            }
     628            else
     629#endif
     630            {
     631                /* test regflat, fAlignMask */
     632                off = iemNativeEmitTestAnyBitsInGpr8Ex(pCodeBuf, off, idxRegFlatPtr, cbMem - 1);
     633                /* jnz tlbmiss */
     634                off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offMisalignedAccess, kIemNativeInstrCond_ne);
     635            }
     636            /** @todo ARM64: two byte access checks can be reduced to single instruction */
     637            iemNativeFixupFixedJump(pReNative, offFixupMisalignedAccessJmpBack, off);
     638        }
     639        else
     640        {
     641            /*
     642             * 2a. Strict alignment check using fAlignMask for atomic, strictly
     643             *     aligned stuff (SSE & AVX) and AC=1 (ring-3).
     644             */
     645            bool const fStrictAlignmentCheck = fAlignMask
     646                                            && (   (fAlignMaskAndCtl & ~UINT32_C(0xff))
     647                                                || (fAccess & IEM_ACCESS_ATOMIC)
     648                                                || (pReNative->fExec & IEM_F_X86_AC) );
     649            if (fStrictAlignmentCheck)
     650            {
     651                /* test regflat, fAlignMask */
     652                off = iemNativeEmitTestAnyBitsInGpr8Ex(pCodeBuf, off, idxRegFlatPtr, fAlignMask);
    595653
    596654#ifndef IEM_WITH_TLB_STATISTICS
    597             /* jnz tlbmiss */
    598             off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
     655                /* jnz tlbmiss */
     656                off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
    599657#else
    600             /* jz  1F; inc stat; jmp tlbmiss */
    601             uint32_t const offFixup1 = off;
    602             off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 16, kIemNativeInstrCond_e);
    603             off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2,
    604                                                       offVCpuTlb + RT_UOFFSETOF(IEMTLB, cTlbNativeMissAlignment));
    605             off = iemNativeEmitJmpToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss);
    606             iemNativeFixupFixedJump(pReNative, offFixup1, off);
    607 #endif
    608         }
    609 
    610         /*
    611          * 2b. Check that it's not crossing page a boundrary if the access is
    612          *     larger than the aligment mask or if we didn't do the strict
    613          *     alignment check above.
    614          */
    615         if (   cbMem > 1
    616             && (   !fStrictAlignmentCheck
    617                 || cbMem > fAlignMask + 1U))
    618         {
    619             /* reg1 = regflat & 0xfff */
    620             off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1,/*=*/ idxRegFlatPtr,/*&*/ GUEST_PAGE_OFFSET_MASK);
    621             /* cmp reg1, GUEST_PAGE_SIZE - cbMem */
    622             off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, GUEST_PAGE_SIZE - cbMem);
     658                /* jz  1F; inc stat; jmp tlbmiss */
     659                uint32_t const offFixup1 = off;
     660                off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 16, kIemNativeInstrCond_e);
     661                off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2,
     662                                                          offVCpuTlb + RT_UOFFSETOF(IEMTLB, cTlbNativeMissAlignment));
     663                off = iemNativeEmitJmpToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss);
     664                iemNativeFixupFixedJump(pReNative, offFixup1, off);
     665#endif
     666            }
     667
     668            /*
     669             * 2b. Check that it's not crossing page a boundrary if the access is
     670             *     larger than the aligment mask or if we didn't do the strict
     671             *     alignment check above.
     672             */
     673            if (   cbMem > 1
     674                && (   !fStrictAlignmentCheck
     675                    || cbMem > fAlignMask + 1U))
     676            {
     677                /* reg1 = regflat & 0xfff */
     678                off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1,/*=*/ idxRegFlatPtr,/*&*/ GUEST_PAGE_OFFSET_MASK);
     679                /* cmp reg1, GUEST_PAGE_SIZE - cbMem */
     680                off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, GUEST_PAGE_SIZE - cbMem);
    623681#ifndef IEM_WITH_TLB_STATISTICS
    624             /* ja  tlbmiss */
    625             off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
     682                /* ja  tlbmiss */
     683                off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
    626684#else
    627             /* jbe 1F; inc stat; jmp tlbmiss */
    628             uint32_t const offFixup1 = off;
    629             off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 16, kIemNativeInstrCond_be);
    630             off = iemNativeEmitIncU32CounterInVCpuEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2,
    631                                                      offVCpuTlb + RT_UOFFSETOF(IEMTLB, cTlbNativeMissCrossPage));
    632             off = iemNativeEmitJmpToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss);
    633             iemNativeFixupFixedJump(pReNative, offFixup1, off);
    634 #endif
     685                /* jbe 1F; inc stat; jmp tlbmiss */
     686                uint32_t const offFixup1 = off;
     687                off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 16, kIemNativeInstrCond_be);
     688                off = iemNativeEmitIncU32CounterInVCpuEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2,
     689                                                         offVCpuTlb + RT_UOFFSETOF(IEMTLB, cTlbNativeMissCrossPage));
     690                off = iemNativeEmitJmpToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss);
     691                iemNativeFixupFixedJump(pReNative, offFixup1, off);
     692#endif
     693            }
    635694        }
    636695    }
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette