Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

powcore.asm

Last change on this file was 98103, checked in by vboxsync, 16 months ago
Copyright year updates by scm.
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 21.1 KB

Line
1	; $Id: powcore.asm 98103 2023-01-17 14:15:46Z vboxsync $
2	;; @file
3	; IPRT - No-CRT common pow code - AMD64 & X86.
4	;
5
6	;
7	; Copyright (C) 2006-2023 Oracle and/or its affiliates.
8	;
9	; This file is part of VirtualBox base platform packages, as
10	; available from https://www.virtualbox.org.
11	;
12	; This program is free software; you can redistribute it and/or
13	; modify it under the terms of the GNU General Public License
14	; as published by the Free Software Foundation, in version 3 of the
15	; License.
16	;
17	; This program is distributed in the hope that it will be useful, but
18	; WITHOUT ANY WARRANTY; without even the implied warranty of
19	; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20	; General Public License for more details.
21	;
22	; You should have received a copy of the GNU General Public License
23	; along with this program; if not, see <https://www.gnu.org/licenses>.
24	;
25	; The contents of this file may alternatively be used under the terms
26	; of the Common Development and Distribution License Version 1.0
27	; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28	; in the VirtualBox distribution, in which case the provisions of the
29	; CDDL are applicable instead of those of the GPL.
30	;
31	; You may elect to license modified versions of this file under the
32	; terms and conditions of either the GPL or the CDDL or both.
33	;
34	; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35	;
36
37
38	%define RT_ASM_WITH_SEH64
39	%include "iprt/asmdefs.mac"
40	%include "iprt/x86.mac"
41
42
43	BEGINCODE
44
45	extern NAME(RT_NOCRT(feraiseexcept))
46
47	;;
48	; Call feraiseexcept(%1)
49	%macro CALL_feraiseexcept_WITH 1
50	%ifdef RT_ARCH_X86
51	mov dword [xSP], X86_FSW_IE
52	%elifdef ASM_CALL64_GCC
53	mov edi, X86_FSW_IE
54	%elifdef ASM_CALL64_MSC
55	mov ecx, X86_FSW_IE
56	%else
57	%error calling conv.
58	%endif
59	call NAME(RT_NOCRT(feraiseexcept))
60	%endmacro
61
62
63	;;
64	; Compute the st1 to the power of st0.
65	;
66	; @returns st(0) = result
67	; eax = what's being returned:
68	; 0 - Just a value.
69	; 1 - The rBase value. Caller may take steps to ensure it's exactly the same.
70	; 2 - The rExp value. Caller may take steps to ensure it's exactly the same.
71	; @param rBase/st1 The base.
72	; @param rExp/st0 The exponent
73	; @param fFxamBase/dx The status flags after fxam(rBase).
74	; @param enmType/ebx The original parameter and return types:
75	; 0 - 32-bit / float
76	; 1 - 64-bit / double
77	; 2 - 80-bit / long double
78	;
79	BEGINPROC rtNoCrtMathPowCore
80	push xBP
81	SEH64_PUSH_xBP
82	mov xBP, xSP
83	SEH64_SET_FRAME_xBP 0
84	sub xSP, 30h
85	SEH64_ALLOCATE_STACK 30h
86	SEH64_END_PROLOGUE
87
88	;
89	; Weed out special values, starting with the exponent.
90	;
91	fxam
92	fnstsw ax
93	mov cx, ax ; cx=fxam(exp)
94
95	and ax, X86_FSW_C3 \| X86_FSW_C2 \| X86_FSW_C0
96	cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero)
97	je .exp_finite
98	cmp ax, X86_FSW_C3 ; Zero
99	je .exp_zero
100	cmp ax, X86_FSW_C3 \| X86_FSW_C2 ; Denormals
101	je .exp_finite
102	cmp ax, X86_FSW_C0 \| X86_FSW_C2 ; Infinity.
103	je .exp_inf
104	jmp .exp_nan
105
106	.exp_finite:
107	;
108	; Detect special base values.
109	;
110	mov ax, dx ; ax=fxam(base)
111	and ax, X86_FSW_C3 \| X86_FSW_C2 \| X86_FSW_C0
112	cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero)
113	je .base_finite
114	cmp ax, X86_FSW_C3 ; Zero
115	je .base_zero
116	cmp ax, X86_FSW_C3 \| X86_FSW_C2 ; Denormals
117	je .base_finite
118	cmp ax, X86_FSW_C0 \| X86_FSW_C2 ; Infinity.
119	je .base_inf
120	jmp .base_nan
121
122	.base_finite:
123	;
124	; 1 in the base is also special.
125	; Rule 6 (see below): base == +1 and exponent = whatever: Return +1.0
126	;
127	fld1
128	fcomip st0, st2
129	je .return_base_value
130
131	;
132	; Check if the exponent is an integer value we can handle in a 64-bit
133	; GRP as that is simpler to handle accurately.
134	;
135	; In 64-bit integer range?
136	fld tword [.s_r80MaxInt xWrtRIP]
137	fcomip st0, st1
138	jb .not_integer_exp
139
140	fld tword [.s_r80MinInt xWrtRIP]
141	fcomip st0, st1
142	ja .not_integer_exp
143
144	; Convert it to integer.
145	fld st0 ; -> st0=exp; st1=exp; st2=base
146	fistp qword [xBP - 8] ; Save and pop 64-bit int (no non-popping version of this instruction).
147
148	fild qword [xBP - 8] ; Load it again for comparison.
149	fucomip st0, st1 ; Compare integer exp and floating point exp to see if they are the same. Pop.
150	jne .not_integer_exp
151
152
153	;
154	;
155	; Ok, we've got an integer exponent value in that fits into a 64-bit.
156	; We'll multiply the base exponention bit by exponention bit, applying
157	; it as a factor for bits that are set.
158	;
159	;
160	.integer_exp:
161	; Load the integer value into edx:exx / rdx and ditch the floating point exponent.
162	mov xDX, [xBP - 8]
163	%ifdef RT_ARCH_X86
164	mov eax, [xBP - 8 + 4]
165	%endif
166	ffreep st0 ; -> st0=base;
167
168	; Load a 1 onto the stack, we'll need it below as well as for converting
169	; a negative exponent to a positive one.
170	fld1 ; -> st0=1.0; st1=base;
171
172	; If the exponent is negative, negate it and change base to 1/base.
173	or xDX, xDX
174	jns .integer_exp_positive
175	neg xDX
176	%ifdef RT_ARCH_X86
177	neg eax
178	sbb edx, 0
179	%endif
180	fdivr st1, st0 ; -> st0=1.0; st1=1/base
181	.integer_exp_positive:
182
183	;
184	; We'll process edx:eax / rdx bit by bit till it's zero, using st0 for
185	; the multiplication factor corresponding to the current exponent bit
186	; and st1 as the result.
187	;
188	fxch ; -> st0=base; st1=1.0;
189	.integer_exp_loop:
190	%ifdef RT_ARCH_X86
191	shrd eax, edx, 1
192	%else
193	shr rdx, 1
194	%endif
195	jnc .integer_exp_loop_advance
196	fmul st1, st0
197
198	.integer_exp_loop_advance:
199	; Check if we're done.
200	%ifdef RT_ARCH_AMD64
201	jz .integer_exp_return ; (we will have the flags for the shr rdx above)
202	%else
203	shr edx, 1 ; complete the above shift operation
204
205	mov ecx, edx ; check if edx:eax is zero.
206	or ecx, eax
207	jz .integer_exp_return
208	%endif
209	; Calculate the factor for the next bit.
210	fmul st0, st0
211	jmp .integer_exp_loop
212
213	.integer_exp_return:
214	ffreep st0 ; drop the factor -> st0=result; no st1.
215	jmp .return_val
216
217
218	;
219	;
220	; Non-integer or value was out of range for an int64_t.
221	;
222	; The approach here is the same as in exp.asm, only we have to do the
223	; log2(base) calculation first as it's a parameter and not a constant.
224	;
225	;
226	.not_integer_exp:
227
228	; First reject negative numbers. We still have the fxam(base) status in dx.
229	test dx, X86_FSW_C1
230	jnz .base_negative_non_integer_exp
231
232	; Swap the items on the stack, so we can process the base first.
233	fxch st0, st1 ; -> st0=base; st1=exponent;
234
235	;
236	; From log2.asm:
237	;
238	; The fyl2xp1 instruction (ST1=ST1*log2(ST0+1.0), popping ST0) has a
239	; valid ST0 range of 1(1-sqrt(0.5)) (approx 0.29289321881) on both
240	; sides of zero. We try use it if we can.
241	;
242	.above_one:
243	; For both fyl2xp1 and fyl2xp1 we need st1=1.0.
244	fld1
245	fxch st0, st1 ; -> st0=base; st1=1.0; st2=exponent
246
247	; Check if the input is within the fyl2xp1 range.
248	fld qword [.s_r64AbsFyL2xP1InputMax xWrtRIP]
249	fcomip st0, st1
250	jbe .cannot_use_fyl2xp1
251
252	fld qword [.s_r64AbsFyL2xP1InputMin xWrtRIP]
253	fcomip st0, st1
254	jae .cannot_use_fyl2xp1
255
256	; Do the calculation.
257	.use_fyl2xp1:
258	fsub st0, st1 ; -> st0=base-1; st1=1.0; st2=exponent
259	fyl2xp1 ; -> st0=1.0*log2(base-1.0+1.0); st1=exponent
260	jmp .done_log2
261
262	.cannot_use_fyl2xp1:
263	fyl2x ; -> st0=1.0*log2(base); st1=exponent
264	.done_log2:
265
266	;
267	; From exp.asm:
268	;
269	; Convert to power of 2 and it'll be the same as exp2.
270	;
271	fmulp ; st0=log2(base); st1=exponent -> st0=pow2exp
272
273	;
274	; Split the job in two on the fraction and integer l2base parts.
275	;
276	fld st0 ; Push a copy of the pow2exp on the stack.
277	frndint ; st0 = (int)pow2exp
278	fsub st1, st0 ; st1 = pow2exp - (int)pow2exp; i.e. st1 = fraction, st0 = integer.
279	fxch ; st0 = fraction, st1 = integer.
280
281	; 1. Calculate on the fraction.
282	f2xm1 ; st0 = 2**fraction - 1.0
283	fld1
284	faddp ; st0 = 2**fraction
285
286	; 2. Apply the integer power of two.
287	fscale ; st0 = result; st1 = integer part of pow2exp.
288	fstp st1 ; st0 = result; no st1.
289
290	;
291	; Return st0.
292	;
293	.return_val:
294	xor eax, eax
295	.return:
296	leave
297	ret
298
299
300	;
301	;
302	; pow() has a lot of defined behavior for special values, which is why
303	; this is the largest and most difficult part of the code. :-)
304	;
305	; On https://pubs.opengroup.org/onlinepubs/9699919799/functions/pow.html
306	; there are 21 error conditions listed in the return value section.
307	; The code below refers to this by number.
308	;
309	; When we get here:
310	; dx=fxam(base)
311	; cx=fxam(exponent)
312	; st1=base
313	; st0=exponent
314	;
315
316	;
317	; 1. Finit base < 0 and finit non-interger exponent: -> domain error (#IE) + NaN.
318	;
319	; The non-integer exponent claim might be wrong, as we only check if it
320	; fits into a int64_t register. But, I don't see how we can calculate
321	; it right now.
322	;
323	.base_negative_non_integer_exp:
324	CALL_feraiseexcept_WITH X86_FSW_IE
325	jmp .return_nan
326
327	;
328	; 7. Exponent = +/-0.0, any base value including NaN: return +1.0
329	; Note! According to https://en.cppreference.com/w/c/numeric/math/pow a
330	; domain error (#IE) occur if base=+/-0. Not implemented.
331	.exp_zero:
332	.return_plus_one:
333	fld1
334	jmp .return_pop_pop_val
335
336	;
337	; 6. Exponent = whatever and base = 1: Return 1.0
338	; 10. Exponent = +/-Inf and base = -1: Return 1.0
339	;6+10 => Exponent = +/-Inf and \|base\| = 1: Return 1.0
340	; 11. Exponent = -Inf and \|base\| < 1: Return +Inf
341	; 12. Exponent = -Inf and \|base\| > 1: Return +0
342	; 13. Exponent = +Inf and \|base\| < 1: Return +0
343	; 14. Exponent = +Inf and \|base\| > 1: Return +Inf
344	;
345	; Note! Rule 4 would trigger for the same conditions as 11 when base == 0,
346	; but it's optional to raise div/0 and it's apparently marked as
347	; obsolete in C23, so not implemented.
348	;
349	.exp_inf:
350	; Check if base is NaN or unsupported.
351	and dx, X86_FSW_C3 \| X86_FSW_C2 \| X86_FSW_C0 ; fxam(base)
352	cmp dx, X86_FSW_C0
353	jbe .return_base_nan
354
355	; Calc fabs(base) and replace the exponent with 1.0 as we're very likely to need this here.
356	ffreep st0
357	fabs
358	fld1 ; st0=1.0; st1=\|rdBase\|
359	fcomi st0, st1
360	je .return_plus_one ; Matches rule 6 + 10 (base is +/-1).
361	ja .exp_inf_base_smaller_than_one
362	.exp_inf_base_larger_than_one:
363	test cx, X86_FSW_C1 ; cx=faxm(exponent); C1=sign
364	jz .return_plus_inf ; Matches rule 14 (exponent is +Inf).
365	jmp .return_plus_zero ; Matches rule 12 (exponent is -Inf).
366
367	.exp_inf_base_smaller_than_one:
368	test cx, X86_FSW_C1 ; cx=faxm(exponent); C1=sign
369	jnz .return_plus_inf ; Matches rule 11 (exponent is -Inf).
370	jmp .return_plus_zero ; Matches rule 13 (exponent is +Inf).
371
372	;
373	; 6. Exponent = whatever and base = 1: Return 1.0
374	; 5. Unless specified elsewhere, return NaN if any of the parameters are NaN.
375	;
376	.exp_nan:
377	; Check if base is a number and possible 1.
378	test dx, X86_FSW_C2 ; dx=fxam(base); C2 is set for finite number, infinity and denormals.
379	jz .return_exp_nan
380	fld1
381	fcomip st0, st2
382	jne .return_exp_nan
383	jmp .return_plus_one
384
385	;
386	; 4a. base == +/-0.0 and exp < 0 and exp is odd integer: Return +/-Inf, raise div/0.
387	; 4b. base == +/-0.0 and exp < 0 and exp is not odd int: Return +Inf, raise div/0.
388	; 8. base == +/-0.0 and exp > 0 and exp is odd integer: Return +/-0.0
389	; 9. base == +/-0.0 and exp > 0 and exp is not odd int: Return +0
390	;
391	; Note! Exponent must be finite and non-zero if we get here.
392	;
393	.base_zero:
394	fldz
395	fcomip st0, st1
396	jbe .base_zero_plus_exp
397	.base_zero_minus_exp:
398	mov cx, dx ; stashing fxam(base) in CX because EDX is trashed by .is_exp_odd_integer
399	call .is_exp_odd_integer ; trashes EDX but no ECX.
400	or eax, eax
401	jz .base_zero_minus_exp_not_odd_int
402
403	; Matching 4a.
404	.base_zero_minus_exp_odd_int:
405	test cx, X86_FSW_C1 ; base sign
406	jz .raise_de_and_return_plus_inf
407	.raise_de_and_return_minus_inf:
408	CALL_feraiseexcept_WITH X86_FSW_DE
409	jmp .return_minus_inf
410	.raise_de_and_return_plus_inf:
411	CALL_feraiseexcept_WITH X86_FSW_DE
412	jmp .return_plus_inf
413
414	; Matching 4b.
415	.base_zero_minus_exp_not_odd_int:
416	CALL_feraiseexcept_WITH X86_FSW_DE
417	jmp .return_plus_inf
418
419	.base_zero_plus_exp:
420	call .is_exp_odd_integer
421	or eax, eax
422	jnz .return_base_value ; Matching 8
423	.return_plus_zero: ; Matching 9
424	fldz
425	jmp .return_pop_pop_val
426
427	;
428	; 15. base == -Inf and exp < 0 and exp is odd integer: Return -0
429	; 16. base == -Inf and exp < 0 and exp is not odd int: Return +0
430	; 17. base == -Inf and exp > 0 and exp is odd integer: Return -Inf
431	; 18. base == -Inf and exp > 0 and exp is not odd int: Return +Inf
432	; 19. base == +Inf and exp < 0: Return +0
433	; 20. base == +Inf and exp > 0: Return +Inf
434	;
435	; Note! Exponent must be finite and non-zero if we get here.
436	;
437	.base_inf:
438	fldz
439	fcomip st0, st1
440	jbe .base_inf_plus_exp
441	.base_inf_minus_exp:
442	test dx, X86_FSW_C1
443	jz .return_plus_zero ; Matches 19 (base == +Inf).
444	.base_minus_inf_minus_exp:
445	call .is_exp_odd_integer
446	or eax, eax
447	jz .return_plus_zero ; Matches 16 (exp not odd and < 0, base == -Inf)
448	.return_minus_zero: ; Matches 15 (exp is odd and < 0, base == -Inf)
449	fldz
450	fchs
451	jmp .return_pop_pop_val
452
453	.base_inf_plus_exp:
454	test dx, X86_FSW_C1
455	jz .return_plus_inf ; Matches 20 (base == +Inf).
456	.base_minus_inf_plus_exp:
457	call .is_exp_odd_integer
458	or eax, eax
459	jnz .return_minus_inf ; Matches 17 (exp is odd and > 0, base == +Inf)
460	jmp .return_plus_inf ; Matches 18 (exp not odd and > 0, base == +Inf)
461
462	;
463	; Return the exponent NaN (or whatever) value.
464	;
465	.return_exp_nan:
466	fld st0
467	mov eax, 2 ; return param 2
468	jmp .return_pop_pop_val_with_eax
469
470	;
471	; Return the base NaN (or whatever) value.
472	;
473	.return_base_nan:
474	.return_base_value:
475	.base_nan: ; 5. Unless specified elsewhere, return NaN if any of the parameters are NaN.
476	fld st1
477	mov eax, 1 ; return param 1
478	jmp .return_pop_pop_val_with_eax
479
480	;
481	; Pops the two values off the FPU stack and returns NaN.
482	;
483	.return_nan:
484	fld qword [.s_r64QNan xWrtRIP]
485	jmp .return_pop_pop_val
486
487	;
488	; Pops the two values off the FPU stack and returns +Inf.
489	;
490	.return_plus_inf:
491	fld qword [.s_r64PlusInf xWrtRIP]
492	jmp .return_pop_pop_val
493
494	;
495	; Pops the two values off the FPU stack and returns -Inf.
496	;
497	.return_minus_inf:
498	fld qword [.s_r64MinusInf xWrtRIP]
499	jmp .return_pop_pop_val
500
501	;
502	; Return st0, remove st1 and st2.
503	;
504	.return_pop_pop_val:
505	xor eax, eax
506	.return_pop_pop_val_with_eax:
507	fstp st2
508	ffreep st0
509	jmp .return
510
511
512	ALIGNCODE(8)
513	.s_r80MaxInt:
514	dt +9223372036854775807.0
515
516	ALIGNCODE(8)
517	.s_r80MinInt:
518	dt -9223372036854775807.0
519
520	ALIGNCODE(8)
521	;; The fyl2xp1 instruction only works between +/-1(1-sqrt(0.5)).
522	; These two variables is that range + 1.0, so we can compare directly
523	; with the input w/o any extra fsub and fabs work.
524	.s_r64AbsFyL2xP1InputMin:
525	dq 0.708 ; -0.292 + 1.0
526	.s_r64AbsFyL2xP1InputMax:
527	dq 1.292
528
529	.s_r64QNan:
530	dq RTFLOAT64U_QNAN_MINUS
531	.s_r64PlusInf:
532	dq RTFLOAT64U_INF_PLUS
533	.s_r64MinusInf:
534	dq RTFLOAT64U_INF_MINUS
535
536	;;
537	; Sub-function that checks if the exponent (st0) is an odd integer or not.
538	;
539	; @returns eax = 1 if odd, 0 if even or not integer.
540	; @uses eax, edx, eflags.
541	;
542	.is_exp_odd_integer:
543	;
544	; Save the FPU enviornment and mask all exceptions.
545	;
546	fnstenv [xBP - 30h]
547	mov ax, [xBP - 30h + X86FSTENV32P.FCW]
548	or word [xBP - 30h + X86FSTENV32P.FCW], X86_FCW_MASK_ALL
549	fldcw [xBP - 30h + X86FSTENV32P.FCW]
550	mov [xBP - 30h + X86FSTENV32P.FCW], ax
551
552	;
553	; Convert to 64-bit integer (probably not 100% correct).
554	;
555	fld st0 ; -> st0=exponent st1=exponent; st2=base;
556	fistp qword [xBP - 10h]
557	fild qword [xBP - 10h] ; -> st0=int(exponent) st1=exponent; st2=base;
558	fcomip st0, st1 ; -> st0=exponent; st1=base;
559	jne .is_exp_odd_integer__return_false ; jump if not integer.
560	mov xAX, [xBP - 10h]
561	%ifdef
562	mov edx, [xBP - 10h + 4]
563	%endif
564
565	;
566	; Check the lowest bit if it might be odd.
567	; This works both for positive and negative numbers.
568	;
569	test al, 1
570	jz .is_exp_odd_integer__return_false ; jump if even.
571
572	;
573	; If the result is negative, convert to positive.
574	;
575	%ifdef RT_ARCH_AMD64
576	bt rax, 63
577	%else
578	bt edx, 31
579	%endif
580	jnc .is_exp_odd_integer__positive
581	%ifdef RT_ARCH_AMD64
582	neg xAX
583	%else
584	neg edx
585	neg eax
586	sbb edx, 0
587	%endif
588	.is_exp_odd_integer__positive:
589
590	;
591	; Now find the most significant bit in the value so we can verify that
592	; the odd bit was part of the mantissa/fraction of the input.
593	;
594	cmp bl, 3 ; Skip if 80-bit input, as it has a 64-bit mantissa which
595	je .is_exp_odd_integer__return_true ; makes it a 1 bit more precision than out integer reg(s).
596
597	%ifdef RT_ARCH_AMD64
598	bsr rax, rax
599	%else
600	bsr edx, edx
601	jnz .is_exp_odd_integer__high_dword_is_zero
602	lea eax, [edx + 20h]
603	jmp .is_exp_odd_integer__first_bit_in_eax
604	.is_exp_odd_integer__high_dword_is_zero:
605	bsr eax, eax
606	.is_exp_odd_integer__first_bit_in_eax:
607	%endif
608	;
609	; The limit is 53 for double precision (one implicit bit + 52 bits fraction),
610	; and 24 for single precision types.
611	;
612	mov ah, 53 ; RTFLOAT64U_FRACTION_BITS + 1
613	cmp bl, 0
614	jne .is_exp_odd_integer__is_double_limit
615	mov ah, 24 ; RTFLOAT32U_FRACTION_BITS + 1
616	.is_exp_odd_integer__is_double_limit:
617
618	cmp al, ah
619	jae .is_exp_odd_integer__return_false
620	mov eax, 1
621
622	; Return.
623	.is_exp_odd_integer__return_true:
624	jmp .is_exp_odd_integer__return
625	.is_exp_odd_integer__return_false:
626	xor eax, eax
627	.is_exp_odd_integer__return:
628	ffreep st0
629	fldenv [xBP - 30h]
630	ret
631
632	ENDPROC rtNoCrtMathPowCore
633

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: vbox/trunk/src/VBox/Runtime/common/math/powcore.asm

Download in other formats: