[96108] | 1 | /* $Id: fma.cpp 98103 2023-01-17 14:15:46Z vboxsync $ */
|
---|
| 2 | /** @file
|
---|
| 3 | * IPRT - No-CRT - fma().
|
---|
| 4 | */
|
---|
| 5 |
|
---|
| 6 | /*
|
---|
[98103] | 7 | * Copyright (C) 2022-2023 Oracle and/or its affiliates.
|
---|
[96108] | 8 | *
|
---|
[96407] | 9 | * This file is part of VirtualBox base platform packages, as
|
---|
| 10 | * available from https://www.virtualbox.org.
|
---|
[96108] | 11 | *
|
---|
[96407] | 12 | * This program is free software; you can redistribute it and/or
|
---|
| 13 | * modify it under the terms of the GNU General Public License
|
---|
| 14 | * as published by the Free Software Foundation, in version 3 of the
|
---|
| 15 | * License.
|
---|
| 16 | *
|
---|
| 17 | * This program is distributed in the hope that it will be useful, but
|
---|
| 18 | * WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
| 20 | * General Public License for more details.
|
---|
| 21 | *
|
---|
| 22 | * You should have received a copy of the GNU General Public License
|
---|
| 23 | * along with this program; if not, see <https://www.gnu.org/licenses>.
|
---|
| 24 | *
|
---|
[96108] | 25 | * The contents of this file may alternatively be used under the terms
|
---|
| 26 | * of the Common Development and Distribution License Version 1.0
|
---|
[96407] | 27 | * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
|
---|
| 28 | * in the VirtualBox distribution, in which case the provisions of the
|
---|
[96108] | 29 | * CDDL are applicable instead of those of the GPL.
|
---|
| 30 | *
|
---|
| 31 | * You may elect to license modified versions of this file under the
|
---|
| 32 | * terms and conditions of either the GPL or the CDDL or both.
|
---|
[96407] | 33 | *
|
---|
| 34 | * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
|
---|
[96108] | 35 | */
|
---|
| 36 |
|
---|
| 37 |
|
---|
| 38 | /*********************************************************************************************************************************
|
---|
| 39 | * Header Files *
|
---|
| 40 | *********************************************************************************************************************************/
|
---|
| 41 | #define IPRT_NO_CRT_FOR_3RD_PARTY
|
---|
| 42 | #include "internal/nocrt.h"
|
---|
| 43 | #include <iprt/nocrt/math.h>
|
---|
| 44 | #include <iprt/assertcompile.h>
|
---|
| 45 | #if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
|
---|
| 46 | # include <iprt/asm-amd64-x86.h>
|
---|
| 47 | # include <iprt/x86.h>
|
---|
| 48 | #endif
|
---|
| 49 | #include <softfloat.h>
|
---|
| 50 |
|
---|
| 51 |
|
---|
| 52 | /*********************************************************************************************************************************
|
---|
| 53 | * External Symbols *
|
---|
| 54 | *********************************************************************************************************************************/
|
---|
| 55 | DECLASM(double) rtNoCrtMathFma3(double rdFactor1, double rdFactor2, double rdAddend);
|
---|
| 56 | DECLASM(double) rtNoCrtMathFma4(double rdFactor1, double rdFactor2, double rdAddend);
|
---|
| 57 |
|
---|
| 58 |
|
---|
| 59 | #undef fma
|
---|
| 60 | double RT_NOCRT(fma)(double rdFactor1, double rdFactor2, double rdAddend)
|
---|
| 61 | {
|
---|
| 62 | /*
|
---|
| 63 | * We prefer using native FMA instructions when available.
|
---|
| 64 | */
|
---|
| 65 | #if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
|
---|
| 66 | typedef enum { kCpuDetect = 0, kCpuWithFma3, kCpuWithFma4, kCpuWithoutFma } CPUFMASUPPORT;
|
---|
| 67 | static CPUFMASUPPORT volatile s_enmSup = kCpuDetect;
|
---|
| 68 | CPUFMASUPPORT enmSup = s_enmSup;
|
---|
| 69 | if (enmSup != kCpuDetect)
|
---|
| 70 | { }
|
---|
| 71 | else
|
---|
| 72 | {
|
---|
| 73 | if (ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_FMA)
|
---|
| 74 | enmSup = kCpuWithFma3;
|
---|
| 75 | else if (ASMCpuId_ECX(UINT32_C(0x80000001)) & X86_CPUID_AMD_FEATURE_ECX_FMA4)
|
---|
| 76 | enmSup = kCpuWithFma4;
|
---|
| 77 | else
|
---|
| 78 | enmSup = kCpuWithoutFma;
|
---|
| 79 | s_enmSup = enmSup;
|
---|
| 80 | }
|
---|
| 81 | if (enmSup == kCpuWithFma3)
|
---|
| 82 | return rtNoCrtMathFma3(rdFactor1, rdFactor2, rdAddend);
|
---|
| 83 | if (enmSup == kCpuWithFma4)
|
---|
| 84 | return rtNoCrtMathFma4(rdFactor1, rdFactor2, rdAddend);
|
---|
| 85 | #endif
|
---|
| 86 |
|
---|
| 87 | /*
|
---|
| 88 | * Fall back on SoftFloat.
|
---|
| 89 | */
|
---|
| 90 | AssertCompile(sizeof(rdFactor1) == sizeof(RTFLOAT64U));
|
---|
| 91 | softfloat_state_t State = SOFTFLOAT_STATE_INIT_DEFAULTS(); /** @todo init from MXCSR/FCW */
|
---|
| 92 | union { RTFLOAT64U Iprt; float64_t SoftFloat; } uFactor1, uFactor2, uAddend, uResult;
|
---|
| 93 | uFactor1.Iprt.rd = rdFactor1;
|
---|
| 94 | uFactor2.Iprt.rd = rdFactor2;
|
---|
| 95 | uAddend.Iprt.rd = rdAddend;
|
---|
| 96 | uResult.SoftFloat = f64_mulAdd(uFactor1.SoftFloat, uFactor2.SoftFloat, uAddend.SoftFloat, &State);
|
---|
| 97 | return uResult.Iprt.rd;
|
---|
| 98 | }
|
---|
| 99 | RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(fma);
|
---|
| 100 |
|
---|