[96108] | 1 | ; $Id: fmaf-asm.asm 98103 2023-01-17 14:15:46Z vboxsync $
|
---|
| 2 | ;; @file
|
---|
| 3 | ; IPRT - No-CRT fmaf alternatives - AMD64 & X86.
|
---|
| 4 | ;
|
---|
| 5 |
|
---|
| 6 | ;
|
---|
[98103] | 7 | ; Copyright (C) 2006-2023 Oracle and/or its affiliates.
|
---|
[96108] | 8 | ;
|
---|
[96407] | 9 | ; This file is part of VirtualBox base platform packages, as
|
---|
| 10 | ; available from https://www.virtualbox.org.
|
---|
[96108] | 11 | ;
|
---|
[96407] | 12 | ; This program is free software; you can redistribute it and/or
|
---|
| 13 | ; modify it under the terms of the GNU General Public License
|
---|
| 14 | ; as published by the Free Software Foundation, in version 3 of the
|
---|
| 15 | ; License.
|
---|
| 16 | ;
|
---|
| 17 | ; This program is distributed in the hope that it will be useful, but
|
---|
| 18 | ; WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 19 | ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
| 20 | ; General Public License for more details.
|
---|
| 21 | ;
|
---|
| 22 | ; You should have received a copy of the GNU General Public License
|
---|
| 23 | ; along with this program; if not, see <https://www.gnu.org/licenses>.
|
---|
| 24 | ;
|
---|
[96108] | 25 | ; The contents of this file may alternatively be used under the terms
|
---|
| 26 | ; of the Common Development and Distribution License Version 1.0
|
---|
[96407] | 27 | ; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
|
---|
| 28 | ; in the VirtualBox distribution, in which case the provisions of the
|
---|
[96108] | 29 | ; CDDL are applicable instead of those of the GPL.
|
---|
| 30 | ;
|
---|
| 31 | ; You may elect to license modified versions of this file under the
|
---|
| 32 | ; terms and conditions of either the GPL or the CDDL or both.
|
---|
| 33 | ;
|
---|
[96407] | 34 | ; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
|
---|
| 35 | ;
|
---|
[96108] | 36 |
|
---|
| 37 | %define RT_ASM_WITH_SEH64
|
---|
| 38 | %include "iprt/asmdefs.mac"
|
---|
| 39 |
|
---|
| 40 | BEGINCODE
|
---|
| 41 |
|
---|
| 42 | ;;
|
---|
| 43 | ; Fused multiplication and add, intel version.
|
---|
| 44 | ;
|
---|
| 45 | ; @returns st(0) / xmm0
|
---|
| 46 | ; @param r32Factor1 [rbp + 08h] / xmm0
|
---|
| 47 | ; @param r32Factor2 [rbp + 0ch] / xmm1
|
---|
| 48 | ; @param r32Addend [rbp + 10h] / xmm2
|
---|
| 49 | BEGINPROC rtNoCrtMathFma3f
|
---|
| 50 | push xBP
|
---|
| 51 | SEH64_PUSH_xBP
|
---|
| 52 | mov xBP, xSP
|
---|
| 53 | SEH64_SET_FRAME_xBP 0
|
---|
| 54 | SEH64_END_PROLOGUE
|
---|
| 55 |
|
---|
| 56 | %ifdef RT_ARCH_X86
|
---|
| 57 | movss xmm0, dword [xBP + xCB*2 + 00h]
|
---|
| 58 | movss xmm1, dword [xBP + xCB*2 + 04h]
|
---|
| 59 | movss xmm2, dword [xBP + xCB*2 + 08h]
|
---|
| 60 | %endif
|
---|
| 61 |
|
---|
| 62 | vfmadd132ss xmm0, xmm2, xmm1 ; xmm0 = xmm0 * xmm1 + xmm2 (132 = multiply op1 with op3 and add op2)
|
---|
| 63 |
|
---|
| 64 | %ifdef RT_ARCH_X86
|
---|
| 65 | sub xSP, 10h
|
---|
| 66 | movss [xSP], xmm0
|
---|
| 67 | fld dword [xSP]
|
---|
| 68 | %endif
|
---|
| 69 | leave
|
---|
| 70 | ret
|
---|
| 71 | ENDPROC rtNoCrtMathFma3f
|
---|
| 72 |
|
---|
| 73 |
|
---|
| 74 | ;;
|
---|
| 75 | ; Fused multiplication and add, amd version.
|
---|
| 76 | ;
|
---|
| 77 | ; @returns st(0) / xmm0
|
---|
| 78 | ; @param r32Factor1 [rbp + 08h] / xmm0
|
---|
| 79 | ; @param r32Factor2 [rbp + 10h] / xmm1
|
---|
| 80 | ; @param r32Addend [rbp + 18h] / xmm2
|
---|
| 81 | BEGINPROC rtNoCrtMathFma4f
|
---|
| 82 | push xBP
|
---|
| 83 | SEH64_PUSH_xBP
|
---|
| 84 | mov xBP, xSP
|
---|
| 85 | SEH64_SET_FRAME_xBP 0
|
---|
| 86 | SEH64_END_PROLOGUE
|
---|
| 87 |
|
---|
| 88 | %ifdef RT_ARCH_X86
|
---|
| 89 | movss xmm0, dword [xBP + xCB*2 + 00h]
|
---|
| 90 | movss xmm1, dword [xBP + xCB*2 + 04h]
|
---|
| 91 | movss xmm2, dword [xBP + xCB*2 + 08h]
|
---|
| 92 | %endif
|
---|
| 93 |
|
---|
| 94 | vfmaddss xmm0, xmm0, xmm1, xmm2 ; xmm0 = xmm0 * xmm1 + xmm2
|
---|
| 95 |
|
---|
| 96 | %ifdef RT_ARCH_X86
|
---|
| 97 | sub xSP, 10h
|
---|
| 98 | movss [xSP], xmm0
|
---|
| 99 | fld dword [xSP]
|
---|
| 100 | %endif
|
---|
| 101 | leave
|
---|
| 102 | ret
|
---|
| 103 | ENDPROC rtNoCrtMathFma4f
|
---|
| 104 |
|
---|