VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/math/powcore.asm

Last change on this file was 98103, checked in by vboxsync, 16 months ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 21.1 KB
Line 
1; $Id: powcore.asm 98103 2023-01-17 14:15:46Z vboxsync $
2;; @file
3; IPRT - No-CRT common pow code - AMD64 & X86.
4;
5
6;
7; Copyright (C) 2006-2023 Oracle and/or its affiliates.
8;
9; This file is part of VirtualBox base platform packages, as
10; available from https://www.virtualbox.org.
11;
12; This program is free software; you can redistribute it and/or
13; modify it under the terms of the GNU General Public License
14; as published by the Free Software Foundation, in version 3 of the
15; License.
16;
17; This program is distributed in the hope that it will be useful, but
18; WITHOUT ANY WARRANTY; without even the implied warranty of
19; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20; General Public License for more details.
21;
22; You should have received a copy of the GNU General Public License
23; along with this program; if not, see <https://www.gnu.org/licenses>.
24;
25; The contents of this file may alternatively be used under the terms
26; of the Common Development and Distribution License Version 1.0
27; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28; in the VirtualBox distribution, in which case the provisions of the
29; CDDL are applicable instead of those of the GPL.
30;
31; You may elect to license modified versions of this file under the
32; terms and conditions of either the GPL or the CDDL or both.
33;
34; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35;
36
37
38%define RT_ASM_WITH_SEH64
39%include "iprt/asmdefs.mac"
40%include "iprt/x86.mac"
41
42
43BEGINCODE
44
45extern NAME(RT_NOCRT(feraiseexcept))
46
47;;
48; Call feraiseexcept(%1)
49%macro CALL_feraiseexcept_WITH 1
50 %ifdef RT_ARCH_X86
51 mov dword [xSP], X86_FSW_IE
52 %elifdef ASM_CALL64_GCC
53 mov edi, X86_FSW_IE
54 %elifdef ASM_CALL64_MSC
55 mov ecx, X86_FSW_IE
56 %else
57 %error calling conv.
58 %endif
59 call NAME(RT_NOCRT(feraiseexcept))
60%endmacro
61
62
63;;
64; Compute the st1 to the power of st0.
65;
66; @returns st(0) = result
67; eax = what's being returned:
68; 0 - Just a value.
69; 1 - The rBase value. Caller may take steps to ensure it's exactly the same.
70; 2 - The rExp value. Caller may take steps to ensure it's exactly the same.
71; @param rBase/st1 The base.
72; @param rExp/st0 The exponent
73; @param fFxamBase/dx The status flags after fxam(rBase).
74; @param enmType/ebx The original parameter and return types:
75; 0 - 32-bit / float
76; 1 - 64-bit / double
77; 2 - 80-bit / long double
78;
79BEGINPROC rtNoCrtMathPowCore
80 push xBP
81 SEH64_PUSH_xBP
82 mov xBP, xSP
83 SEH64_SET_FRAME_xBP 0
84 sub xSP, 30h
85 SEH64_ALLOCATE_STACK 30h
86 SEH64_END_PROLOGUE
87
88 ;
89 ; Weed out special values, starting with the exponent.
90 ;
91 fxam
92 fnstsw ax
93 mov cx, ax ; cx=fxam(exp)
94
95 and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0
96 cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero)
97 je .exp_finite
98 cmp ax, X86_FSW_C3 ; Zero
99 je .exp_zero
100 cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals
101 je .exp_finite
102 cmp ax, X86_FSW_C0 | X86_FSW_C2 ; Infinity.
103 je .exp_inf
104 jmp .exp_nan
105
106.exp_finite:
107 ;
108 ; Detect special base values.
109 ;
110 mov ax, dx ; ax=fxam(base)
111 and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0
112 cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero)
113 je .base_finite
114 cmp ax, X86_FSW_C3 ; Zero
115 je .base_zero
116 cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals
117 je .base_finite
118 cmp ax, X86_FSW_C0 | X86_FSW_C2 ; Infinity.
119 je .base_inf
120 jmp .base_nan
121
122.base_finite:
123 ;
124 ; 1 in the base is also special.
125 ; Rule 6 (see below): base == +1 and exponent = whatever: Return +1.0
126 ;
127 fld1
128 fcomip st0, st2
129 je .return_base_value
130
131 ;
132 ; Check if the exponent is an integer value we can handle in a 64-bit
133 ; GRP as that is simpler to handle accurately.
134 ;
135 ; In 64-bit integer range?
136 fld tword [.s_r80MaxInt xWrtRIP]
137 fcomip st0, st1
138 jb .not_integer_exp
139
140 fld tword [.s_r80MinInt xWrtRIP]
141 fcomip st0, st1
142 ja .not_integer_exp
143
144 ; Convert it to integer.
145 fld st0 ; -> st0=exp; st1=exp; st2=base
146 fistp qword [xBP - 8] ; Save and pop 64-bit int (no non-popping version of this instruction).
147
148 fild qword [xBP - 8] ; Load it again for comparison.
149 fucomip st0, st1 ; Compare integer exp and floating point exp to see if they are the same. Pop.
150 jne .not_integer_exp
151
152
153 ;
154 ;
155 ; Ok, we've got an integer exponent value in that fits into a 64-bit.
156 ; We'll multiply the base exponention bit by exponention bit, applying
157 ; it as a factor for bits that are set.
158 ;
159 ;
160.integer_exp:
161 ; Load the integer value into edx:exx / rdx and ditch the floating point exponent.
162 mov xDX, [xBP - 8]
163%ifdef RT_ARCH_X86
164 mov eax, [xBP - 8 + 4]
165%endif
166 ffreep st0 ; -> st0=base;
167
168 ; Load a 1 onto the stack, we'll need it below as well as for converting
169 ; a negative exponent to a positive one.
170 fld1 ; -> st0=1.0; st1=base;
171
172 ; If the exponent is negative, negate it and change base to 1/base.
173 or xDX, xDX
174 jns .integer_exp_positive
175 neg xDX
176%ifdef RT_ARCH_X86
177 neg eax
178 sbb edx, 0
179%endif
180 fdivr st1, st0 ; -> st0=1.0; st1=1/base
181.integer_exp_positive:
182
183 ;
184 ; We'll process edx:eax / rdx bit by bit till it's zero, using st0 for
185 ; the multiplication factor corresponding to the current exponent bit
186 ; and st1 as the result.
187 ;
188 fxch ; -> st0=base; st1=1.0;
189.integer_exp_loop:
190%ifdef RT_ARCH_X86
191 shrd eax, edx, 1
192%else
193 shr rdx, 1
194%endif
195 jnc .integer_exp_loop_advance
196 fmul st1, st0
197
198.integer_exp_loop_advance:
199 ; Check if we're done.
200%ifdef RT_ARCH_AMD64
201 jz .integer_exp_return ; (we will have the flags for the shr rdx above)
202%else
203 shr edx, 1 ; complete the above shift operation
204
205 mov ecx, edx ; check if edx:eax is zero.
206 or ecx, eax
207 jz .integer_exp_return
208%endif
209 ; Calculate the factor for the next bit.
210 fmul st0, st0
211 jmp .integer_exp_loop
212
213.integer_exp_return:
214 ffreep st0 ; drop the factor -> st0=result; no st1.
215 jmp .return_val
216
217
218 ;
219 ;
220 ; Non-integer or value was out of range for an int64_t.
221 ;
222 ; The approach here is the same as in exp.asm, only we have to do the
223 ; log2(base) calculation first as it's a parameter and not a constant.
224 ;
225 ;
226.not_integer_exp:
227
228 ; First reject negative numbers. We still have the fxam(base) status in dx.
229 test dx, X86_FSW_C1
230 jnz .base_negative_non_integer_exp
231
232 ; Swap the items on the stack, so we can process the base first.
233 fxch st0, st1 ; -> st0=base; st1=exponent;
234
235 ;
236 ; From log2.asm:
237 ;
238 ; The fyl2xp1 instruction (ST1=ST1*log2(ST0+1.0), popping ST0) has a
239 ; valid ST0 range of 1(1-sqrt(0.5)) (approx 0.29289321881) on both
240 ; sides of zero. We try use it if we can.
241 ;
242.above_one:
243 ; For both fyl2xp1 and fyl2xp1 we need st1=1.0.
244 fld1
245 fxch st0, st1 ; -> st0=base; st1=1.0; st2=exponent
246
247 ; Check if the input is within the fyl2xp1 range.
248 fld qword [.s_r64AbsFyL2xP1InputMax xWrtRIP]
249 fcomip st0, st1
250 jbe .cannot_use_fyl2xp1
251
252 fld qword [.s_r64AbsFyL2xP1InputMin xWrtRIP]
253 fcomip st0, st1
254 jae .cannot_use_fyl2xp1
255
256 ; Do the calculation.
257.use_fyl2xp1:
258 fsub st0, st1 ; -> st0=base-1; st1=1.0; st2=exponent
259 fyl2xp1 ; -> st0=1.0*log2(base-1.0+1.0); st1=exponent
260 jmp .done_log2
261
262.cannot_use_fyl2xp1:
263 fyl2x ; -> st0=1.0*log2(base); st1=exponent
264.done_log2:
265
266 ;
267 ; From exp.asm:
268 ;
269 ; Convert to power of 2 and it'll be the same as exp2.
270 ;
271 fmulp ; st0=log2(base); st1=exponent -> st0=pow2exp
272
273 ;
274 ; Split the job in two on the fraction and integer l2base parts.
275 ;
276 fld st0 ; Push a copy of the pow2exp on the stack.
277 frndint ; st0 = (int)pow2exp
278 fsub st1, st0 ; st1 = pow2exp - (int)pow2exp; i.e. st1 = fraction, st0 = integer.
279 fxch ; st0 = fraction, st1 = integer.
280
281 ; 1. Calculate on the fraction.
282 f2xm1 ; st0 = 2**fraction - 1.0
283 fld1
284 faddp ; st0 = 2**fraction
285
286 ; 2. Apply the integer power of two.
287 fscale ; st0 = result; st1 = integer part of pow2exp.
288 fstp st1 ; st0 = result; no st1.
289
290 ;
291 ; Return st0.
292 ;
293.return_val:
294 xor eax, eax
295.return:
296 leave
297 ret
298
299
300 ;
301 ;
302 ; pow() has a lot of defined behavior for special values, which is why
303 ; this is the largest and most difficult part of the code. :-)
304 ;
305 ; On https://pubs.opengroup.org/onlinepubs/9699919799/functions/pow.html
306 ; there are 21 error conditions listed in the return value section.
307 ; The code below refers to this by number.
308 ;
309 ; When we get here:
310 ; dx=fxam(base)
311 ; cx=fxam(exponent)
312 ; st1=base
313 ; st0=exponent
314 ;
315
316 ;
317 ; 1. Finit base < 0 and finit non-interger exponent: -> domain error (#IE) + NaN.
318 ;
319 ; The non-integer exponent claim might be wrong, as we only check if it
320 ; fits into a int64_t register. But, I don't see how we can calculate
321 ; it right now.
322 ;
323.base_negative_non_integer_exp:
324 CALL_feraiseexcept_WITH X86_FSW_IE
325 jmp .return_nan
326
327 ;
328 ; 7. Exponent = +/-0.0, any base value including NaN: return +1.0
329 ; Note! According to https://en.cppreference.com/w/c/numeric/math/pow a
330 ; domain error (#IE) occur if base=+/-0. Not implemented.
331.exp_zero:
332.return_plus_one:
333 fld1
334 jmp .return_pop_pop_val
335
336 ;
337 ; 6. Exponent = whatever and base = 1: Return 1.0
338 ; 10. Exponent = +/-Inf and base = -1: Return 1.0
339 ;6+10 => Exponent = +/-Inf and |base| = 1: Return 1.0
340 ; 11. Exponent = -Inf and |base| < 1: Return +Inf
341 ; 12. Exponent = -Inf and |base| > 1: Return +0
342 ; 13. Exponent = +Inf and |base| < 1: Return +0
343 ; 14. Exponent = +Inf and |base| > 1: Return +Inf
344 ;
345 ; Note! Rule 4 would trigger for the same conditions as 11 when base == 0,
346 ; but it's optional to raise div/0 and it's apparently marked as
347 ; obsolete in C23, so not implemented.
348 ;
349.exp_inf:
350 ; Check if base is NaN or unsupported.
351 and dx, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 ; fxam(base)
352 cmp dx, X86_FSW_C0
353 jbe .return_base_nan
354
355 ; Calc fabs(base) and replace the exponent with 1.0 as we're very likely to need this here.
356 ffreep st0
357 fabs
358 fld1 ; st0=1.0; st1=|rdBase|
359 fcomi st0, st1
360 je .return_plus_one ; Matches rule 6 + 10 (base is +/-1).
361 ja .exp_inf_base_smaller_than_one
362.exp_inf_base_larger_than_one:
363 test cx, X86_FSW_C1 ; cx=faxm(exponent); C1=sign
364 jz .return_plus_inf ; Matches rule 14 (exponent is +Inf).
365 jmp .return_plus_zero ; Matches rule 12 (exponent is -Inf).
366
367.exp_inf_base_smaller_than_one:
368 test cx, X86_FSW_C1 ; cx=faxm(exponent); C1=sign
369 jnz .return_plus_inf ; Matches rule 11 (exponent is -Inf).
370 jmp .return_plus_zero ; Matches rule 13 (exponent is +Inf).
371
372 ;
373 ; 6. Exponent = whatever and base = 1: Return 1.0
374 ; 5. Unless specified elsewhere, return NaN if any of the parameters are NaN.
375 ;
376.exp_nan:
377 ; Check if base is a number and possible 1.
378 test dx, X86_FSW_C2 ; dx=fxam(base); C2 is set for finite number, infinity and denormals.
379 jz .return_exp_nan
380 fld1
381 fcomip st0, st2
382 jne .return_exp_nan
383 jmp .return_plus_one
384
385 ;
386 ; 4a. base == +/-0.0 and exp < 0 and exp is odd integer: Return +/-Inf, raise div/0.
387 ; 4b. base == +/-0.0 and exp < 0 and exp is not odd int: Return +Inf, raise div/0.
388 ; 8. base == +/-0.0 and exp > 0 and exp is odd integer: Return +/-0.0
389 ; 9. base == +/-0.0 and exp > 0 and exp is not odd int: Return +0
390 ;
391 ; Note! Exponent must be finite and non-zero if we get here.
392 ;
393.base_zero:
394 fldz
395 fcomip st0, st1
396 jbe .base_zero_plus_exp
397.base_zero_minus_exp:
398 mov cx, dx ; stashing fxam(base) in CX because EDX is trashed by .is_exp_odd_integer
399 call .is_exp_odd_integer ; trashes EDX but no ECX.
400 or eax, eax
401 jz .base_zero_minus_exp_not_odd_int
402
403 ; Matching 4a.
404.base_zero_minus_exp_odd_int:
405 test cx, X86_FSW_C1 ; base sign
406 jz .raise_de_and_return_plus_inf
407.raise_de_and_return_minus_inf:
408 CALL_feraiseexcept_WITH X86_FSW_DE
409 jmp .return_minus_inf
410.raise_de_and_return_plus_inf:
411 CALL_feraiseexcept_WITH X86_FSW_DE
412 jmp .return_plus_inf
413
414 ; Matching 4b.
415.base_zero_minus_exp_not_odd_int:
416 CALL_feraiseexcept_WITH X86_FSW_DE
417 jmp .return_plus_inf
418
419.base_zero_plus_exp:
420 call .is_exp_odd_integer
421 or eax, eax
422 jnz .return_base_value ; Matching 8
423.return_plus_zero: ; Matching 9
424 fldz
425 jmp .return_pop_pop_val
426
427 ;
428 ; 15. base == -Inf and exp < 0 and exp is odd integer: Return -0
429 ; 16. base == -Inf and exp < 0 and exp is not odd int: Return +0
430 ; 17. base == -Inf and exp > 0 and exp is odd integer: Return -Inf
431 ; 18. base == -Inf and exp > 0 and exp is not odd int: Return +Inf
432 ; 19. base == +Inf and exp < 0: Return +0
433 ; 20. base == +Inf and exp > 0: Return +Inf
434 ;
435 ; Note! Exponent must be finite and non-zero if we get here.
436 ;
437.base_inf:
438 fldz
439 fcomip st0, st1
440 jbe .base_inf_plus_exp
441.base_inf_minus_exp:
442 test dx, X86_FSW_C1
443 jz .return_plus_zero ; Matches 19 (base == +Inf).
444.base_minus_inf_minus_exp:
445 call .is_exp_odd_integer
446 or eax, eax
447 jz .return_plus_zero ; Matches 16 (exp not odd and < 0, base == -Inf)
448.return_minus_zero: ; Matches 15 (exp is odd and < 0, base == -Inf)
449 fldz
450 fchs
451 jmp .return_pop_pop_val
452
453.base_inf_plus_exp:
454 test dx, X86_FSW_C1
455 jz .return_plus_inf ; Matches 20 (base == +Inf).
456.base_minus_inf_plus_exp:
457 call .is_exp_odd_integer
458 or eax, eax
459 jnz .return_minus_inf ; Matches 17 (exp is odd and > 0, base == +Inf)
460 jmp .return_plus_inf ; Matches 18 (exp not odd and > 0, base == +Inf)
461
462 ;
463 ; Return the exponent NaN (or whatever) value.
464 ;
465.return_exp_nan:
466 fld st0
467 mov eax, 2 ; return param 2
468 jmp .return_pop_pop_val_with_eax
469
470 ;
471 ; Return the base NaN (or whatever) value.
472 ;
473.return_base_nan:
474.return_base_value:
475.base_nan: ; 5. Unless specified elsewhere, return NaN if any of the parameters are NaN.
476 fld st1
477 mov eax, 1 ; return param 1
478 jmp .return_pop_pop_val_with_eax
479
480 ;
481 ; Pops the two values off the FPU stack and returns NaN.
482 ;
483.return_nan:
484 fld qword [.s_r64QNan xWrtRIP]
485 jmp .return_pop_pop_val
486
487 ;
488 ; Pops the two values off the FPU stack and returns +Inf.
489 ;
490.return_plus_inf:
491 fld qword [.s_r64PlusInf xWrtRIP]
492 jmp .return_pop_pop_val
493
494 ;
495 ; Pops the two values off the FPU stack and returns -Inf.
496 ;
497.return_minus_inf:
498 fld qword [.s_r64MinusInf xWrtRIP]
499 jmp .return_pop_pop_val
500
501 ;
502 ; Return st0, remove st1 and st2.
503 ;
504.return_pop_pop_val:
505 xor eax, eax
506.return_pop_pop_val_with_eax:
507 fstp st2
508 ffreep st0
509 jmp .return
510
511
512ALIGNCODE(8)
513.s_r80MaxInt:
514 dt +9223372036854775807.0
515
516ALIGNCODE(8)
517.s_r80MinInt:
518 dt -9223372036854775807.0
519
520ALIGNCODE(8)
521 ;; The fyl2xp1 instruction only works between +/-1(1-sqrt(0.5)).
522 ; These two variables is that range + 1.0, so we can compare directly
523 ; with the input w/o any extra fsub and fabs work.
524.s_r64AbsFyL2xP1InputMin:
525 dq 0.708 ; -0.292 + 1.0
526.s_r64AbsFyL2xP1InputMax:
527 dq 1.292
528
529.s_r64QNan:
530 dq RTFLOAT64U_QNAN_MINUS
531.s_r64PlusInf:
532 dq RTFLOAT64U_INF_PLUS
533.s_r64MinusInf:
534 dq RTFLOAT64U_INF_MINUS
535
536 ;;
537 ; Sub-function that checks if the exponent (st0) is an odd integer or not.
538 ;
539 ; @returns eax = 1 if odd, 0 if even or not integer.
540 ; @uses eax, edx, eflags.
541 ;
542.is_exp_odd_integer:
543 ;
544 ; Save the FPU enviornment and mask all exceptions.
545 ;
546 fnstenv [xBP - 30h]
547 mov ax, [xBP - 30h + X86FSTENV32P.FCW]
548 or word [xBP - 30h + X86FSTENV32P.FCW], X86_FCW_MASK_ALL
549 fldcw [xBP - 30h + X86FSTENV32P.FCW]
550 mov [xBP - 30h + X86FSTENV32P.FCW], ax
551
552 ;
553 ; Convert to 64-bit integer (probably not 100% correct).
554 ;
555 fld st0 ; -> st0=exponent st1=exponent; st2=base;
556 fistp qword [xBP - 10h]
557 fild qword [xBP - 10h] ; -> st0=int(exponent) st1=exponent; st2=base;
558 fcomip st0, st1 ; -> st0=exponent; st1=base;
559 jne .is_exp_odd_integer__return_false ; jump if not integer.
560 mov xAX, [xBP - 10h]
561%ifdef
562 mov edx, [xBP - 10h + 4]
563%endif
564
565 ;
566 ; Check the lowest bit if it might be odd.
567 ; This works both for positive and negative numbers.
568 ;
569 test al, 1
570 jz .is_exp_odd_integer__return_false ; jump if even.
571
572 ;
573 ; If the result is negative, convert to positive.
574 ;
575%ifdef RT_ARCH_AMD64
576 bt rax, 63
577%else
578 bt edx, 31
579%endif
580 jnc .is_exp_odd_integer__positive
581%ifdef RT_ARCH_AMD64
582 neg xAX
583%else
584 neg edx
585 neg eax
586 sbb edx, 0
587%endif
588.is_exp_odd_integer__positive:
589
590 ;
591 ; Now find the most significant bit in the value so we can verify that
592 ; the odd bit was part of the mantissa/fraction of the input.
593 ;
594 cmp bl, 3 ; Skip if 80-bit input, as it has a 64-bit mantissa which
595 je .is_exp_odd_integer__return_true ; makes it a 1 bit more precision than out integer reg(s).
596
597%ifdef RT_ARCH_AMD64
598 bsr rax, rax
599%else
600 bsr edx, edx
601 jnz .is_exp_odd_integer__high_dword_is_zero
602 lea eax, [edx + 20h]
603 jmp .is_exp_odd_integer__first_bit_in_eax
604.is_exp_odd_integer__high_dword_is_zero:
605 bsr eax, eax
606.is_exp_odd_integer__first_bit_in_eax:
607%endif
608 ;
609 ; The limit is 53 for double precision (one implicit bit + 52 bits fraction),
610 ; and 24 for single precision types.
611 ;
612 mov ah, 53 ; RTFLOAT64U_FRACTION_BITS + 1
613 cmp bl, 0
614 jne .is_exp_odd_integer__is_double_limit
615 mov ah, 24 ; RTFLOAT32U_FRACTION_BITS + 1
616.is_exp_odd_integer__is_double_limit:
617
618 cmp al, ah
619 jae .is_exp_odd_integer__return_false
620 mov eax, 1
621
622 ; Return.
623.is_exp_odd_integer__return_true:
624 jmp .is_exp_odd_integer__return
625.is_exp_odd_integer__return_false:
626 xor eax, eax
627.is_exp_odd_integer__return:
628 ffreep st0
629 fldenv [xBP - 30h]
630 ret
631
632ENDPROC rtNoCrtMathPowCore
633
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use