log2f.asm

Last change on this file was 106061, checked in by vboxsync, 8 months ago
Copyright year updates by scm.
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 5.8 KB

Line
1	; $Id: log2f.asm 106061 2024-09-16 14:03:52Z vboxsync $
2	;; @file
3	; IPRT - No-CRT log2f - AMD64 & X86.
4	;
5
6	;
7	; Copyright (C) 2006-2024 Oracle and/or its affiliates.
8	;
9	; This file is part of VirtualBox base platform packages, as
10	; available from https://www.virtualbox.org.
11	;
12	; This program is free software; you can redistribute it and/or
13	; modify it under the terms of the GNU General Public License
14	; as published by the Free Software Foundation, in version 3 of the
15	; License.
16	;
17	; This program is distributed in the hope that it will be useful, but
18	; WITHOUT ANY WARRANTY; without even the implied warranty of
19	; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20	; General Public License for more details.
21	;
22	; You should have received a copy of the GNU General Public License
23	; along with this program; if not, see <https://www.gnu.org/licenses>.
24	;
25	; The contents of this file may alternatively be used under the terms
26	; of the Common Development and Distribution License Version 1.0
27	; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28	; in the VirtualBox distribution, in which case the provisions of the
29	; CDDL are applicable instead of those of the GPL.
30	;
31	; You may elect to license modified versions of this file under the
32	; terms and conditions of either the GPL or the CDDL or both.
33	;
34	; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35	;
36
37
38	%define RT_ASM_WITH_SEH64
39	%include "iprt/asmdefs.mac"
40	%include "iprt/x86.mac"
41
42
43	BEGINCODE
44
45	extern NAME(RT_NOCRT(feraiseexcept))
46
47	;;
48	; Compute the log2f of rf
49	; @returns st(0) / xmm0
50	; @param rf [xSP + xCB*2] / xmm0
51	RT_NOCRT_BEGINPROC log2f
52	push xBP
53	SEH64_PUSH_xBP
54	mov xBP, xSP
55	SEH64_SET_FRAME_xBP 0
56	sub xSP, 20h
57	SEH64_ALLOCATE_STACK 20h
58	SEH64_END_PROLOGUE
59
60	;
61	; Load the input into st0.
62	;
63	%ifdef RT_ARCH_AMD64
64	movss [xBP - 10h], xmm0
65	fld dword [xBP - 10h]
66	%else
67	fld dword [xBP + xCB*2]
68	%endif
69
70	;
71	; Weed out non-normal values.
72	;
73	fxam
74	fnstsw ax
75	mov cx, ax
76	and ax, X86_FSW_C3 \| X86_FSW_C2 \| X86_FSW_C0
77	cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero)
78	je .finite
79	cmp ax, X86_FSW_C3 ; Zero
80	je .zero
81	cmp ax, X86_FSW_C3 \| X86_FSW_C2 ; Denormals
82	je .finite
83	cmp ax, X86_FSW_C0 \| X86_FSW_C2 ; Infinity.
84	je .inf
85	jmp .nan
86
87	.finite:
88	; Negative number?
89	test cx, X86_FSW_C1
90	jnz .negative
91
92	; Is it +1.0?
93	fld1
94	fcomip st1
95	jz .plus_one
96
97	;
98	; The fyl2xp1 instruction (ST1=ST1*log2(ST0+1.0), popping ST0) has a
99	; valid ST0 range of 1(1-sqrt(0.5)) (approx 0.29289321881) on both
100	; sides of zero. We try use it if we can.
101	;
102	.above_one:
103	; For both fyl2xp1 and fyl2xp1 we need st1=1.0.
104	fld1
105	fxch st0, st1 ; -> st0=input; st1=1.0
106
107	; Check if the input is within the fyl2xp1 range.
108	fld qword [.s_r64AbsFyL2xP1InputMax xWrtRIP]
109	fcomip st0, st1
110	jbe .cannot_use_fyl2xp1
111
112	fld qword [.s_r64AbsFyL2xP1InputMin xWrtRIP]
113	fcomip st0, st1
114	jae .cannot_use_fyl2xp1
115
116	; Do the calculation.
117	.use_fyl2xp1:
118	fsub st0, st1 ; -> st0=input-1; st1=1.0
119	fyl2xp1 ; -> st0=1.0*log2(st0+1.0)
120	jmp .return_val
121
122	.cannot_use_fyl2xp1:
123	fyl2x ; -> st0=1.0*log2(st0)
124
125	;
126	; Run st0.
127	;
128	.return_val:
129	%ifdef RT_ARCH_AMD64
130	fstp dword [xBP - 10h]
131	movss xmm0, [xBP - 10h]
132	%endif
133	.return:
134	leave
135	ret
136
137
138	;
139	; +1.0: Return +0.0.
140	;
141	.plus_one:
142	ffreep st0
143	fldz
144	jmp .return_val
145
146	;
147	; Negative numbers: Return NaN and raise invalid operation.
148	;
149	.negative:
150	.minus_inf:
151	; Raise invalid operation
152	%ifdef RT_ARCH_X86
153	mov dword [xSP], X86_FSW_IE
154	%elifdef ASM_CALL64_GCC
155	mov edi, X86_FSW_IE
156	%elifdef ASM_CALL64_MSC
157	mov ecx, X86_FSW_IE
158	%else
159	%error calling conv.
160	%endif
161	call NAME(RT_NOCRT(feraiseexcept))
162
163	; Load NaN
164	%ifdef RT_ARCH_AMD64
165	movss xmm0, [.s_r32NaN xWrtRIP]
166	%else
167	fld dword [.s_r32NaN xWrtRIP]
168	%endif
169	jmp .return
170
171	;
172	; +/-0.0: Return inf and raise divide by zero error.
173	;
174	.zero:
175	ffreep st0
176
177	; Raise div/0
178	%ifdef RT_ARCH_X86
179	mov dword [xSP], X86_FSW_ZE
180	%elifdef ASM_CALL64_GCC
181	mov edi, X86_FSW_ZE
182	%elifdef ASM_CALL64_MSC
183	mov ecx, X86_FSW_ZE
184	%else
185	%error calling conv.
186	%endif
187	call NAME(RT_NOCRT(feraiseexcept))
188
189	; Load +Inf
190	%ifdef RT_ARCH_AMD64
191	movss xmm0, [.s_r32MinusInf xWrtRIP]
192	%else
193	fld dword [.s_r32MinusInf xWrtRIP]
194	%endif
195	jmp .return
196
197	;
198	; -Inf: Same as other negative numbers
199	; +Inf: return +Inf. Join path with NaN.
200	;
201	.inf:
202	test cx, X86_FSW_C1 ; sign bit
203	jnz .minus_inf
204
205	;
206	; NaN: Return the input NaN value as is, if we can.
207	;
208	.nan:
209	%ifdef RT_ARCH_AMD64
210	ffreep st0
211	%endif
212	jmp .return
213
214	ALIGNCODE(8)
215	;; The fyl2xp1 instruction only works between +/-1(1-sqrt(0.5)).
216	; These two variables is that range + 1.0, so we can compare directly
217	; with the input w/o any extra fsub and fabs work.
218	.s_r64AbsFyL2xP1InputMin:
219	dq 0.708 ; -0.292 + 1.0
220	.s_r64AbsFyL2xP1InputMax:
221	dq 1.292
222	.s_r32MinusInf:
223	dd RTFLOAT32U_INF_MINUS
224	.s_r32NaN:
225	dd RTFLOAT32U_QNAN_MINUS
226	ENDPROC RT_NOCRT(log2f)
227

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/Runtime/common/math/log2f.asm

Download in other formats: