Context Navigation

← Previous Revision
Next Revision →
Normal
Revision Log

sincore.asm

Last change on this file was 98103, checked in by vboxsync, 17 months ago
Copyright year updates by scm.
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 11.2 KB

Rev	Line
[96240]	1	; $Id: sincore.asm 98103 2023-01-17 14:15:46Z vboxsync $
	2	;; @file
	3	; IPRT - No-CRT common sin & cos - AMD64 & X86.
	4	;
	5
	6	;
[98103]	7	; Copyright (C) 2022-2023 Oracle and/or its affiliates.
[96240]	8	;
[96407]	9	; This file is part of VirtualBox base platform packages, as
	10	; available from https://www.virtualbox.org.
[96240]	11	;
[96407]	12	; This program is free software; you can redistribute it and/or
	13	; modify it under the terms of the GNU General Public License
	14	; as published by the Free Software Foundation, in version 3 of the
	15	; License.
	16	;
	17	; This program is distributed in the hope that it will be useful, but
	18	; WITHOUT ANY WARRANTY; without even the implied warranty of
	19	; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	20	; General Public License for more details.
	21	;
	22	; You should have received a copy of the GNU General Public License
	23	; along with this program; if not, see <https://www.gnu.org/licenses>.
	24	;
[96240]	25	; The contents of this file may alternatively be used under the terms
	26	; of the Common Development and Distribution License Version 1.0
[96407]	27	; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
	28	; in the VirtualBox distribution, in which case the provisions of the
[96240]	29	; CDDL are applicable instead of those of the GPL.
	30	;
	31	; You may elect to license modified versions of this file under the
	32	; terms and conditions of either the GPL or the CDDL or both.
	33	;
[96407]	34	; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
	35	;
[96240]	36
	37
	38	%define RT_ASM_WITH_SEH64
	39	%include "iprt/asmdefs.mac"
	40	%include "iprt/x86.mac"
	41
	42
	43	BEGINCODE
	44
	45	;;
	46	; Internal sine and cosine worker that calculates the sine of st0 returning
	47	; it in st0.
	48	;
	49	; When called by a sine function, fabs(st0) >= pi/2.
	50	; When called by a cosine function, fabs(original input value) >= 3pi/8.
	51	;
	52	; That the input isn't a tiny number close to zero, means that we can do a bit
	53	; cruder rounding when operating close to a pi/2 boundrary. The value in the
	54	; ecx register indicates the input precision and controls the crudeness of the
	55	; rounding.
	56	;
	57	; @returns st0 = sine
	58	; @param st0 A finite number to calucate sine of.
	59	; @param ecx Set to 0 if original input was a 32-bit float.
	60	; Set to 1 if original input was a 64-bit double.
	61	; set to 2 if original input was a 80-bit long double.
	62	;
	63	BEGINPROC rtNoCrtMathSinCore
	64	push xBP
	65	SEH64_PUSH_xBP
	66	mov xBP, xSP
	67	SEH64_SET_FRAME_xBP 0
	68	SEH64_END_PROLOGUE
	69
	70	;
	71	; Load the pointer to the rounding crudeness factor into xDX.
	72	;
	73	lea xDX, [.s_ar64NearZero xWrtRIP]
	74	lea xDX, [xDX + xCX * xCB]
	75
	76	;
	77	; Finite number. We want it in the range [0,2pi] and will preform
	78	; a remainder division if it isn't.
	79	;
	80	fcom qword [.s_r64Max xWrtRIP] ; compares st0 and 2*pi
	81	fnstsw ax
	82	test ax, X86_FSW_C3 \| X86_FSW_C0 \| X86_FSW_C2 ; C3 := st0 == mem; C0 := st0 < mem; C2 := unordered (should be the case);
	83	jz .reduce_st0 ; Jump if st0 > mem
	84
	85	fcom qword [.s_r64Min xWrtRIP] ; compares st0 and 0.0
	86	fnstsw ax
	87	test ax, X86_FSW_C3 \| X86_FSW_C0
	88	jnz .reduce_st0 ; Jump if st0 <= mem
	89
	90	;
	91	; We get here if st0 is in the [0,2pi] range.
	92	;
	93	; Now, FSIN is documented to be reasonably accurate for the range
	94	; -3pi/4 to +3pi/4, so we have to make some more effort to calculate
	95	; in that range only.
	96	;
	97	.in_range:
	98	; if (st0 < pi)
	99	fldpi
	100	fcom st1 ; compares st0 (pi) with st1 (the normalized value)
	101	fnstsw ax
	102	test ax, X86_FSW_C0 ; st1 > pi
	103	jnz .larger_than_pi
	104	test ax, X86_FSW_C3
	105	jnz .equals_pi
	106
	107	;
	108	; input in the range [0,pi[
	109	;
	110	.smaller_than_pi:
	111	fdiv qword [.s_r64Two xWrtRIP] ; st0 = pi/2
	112
	113	; if (st0 < pi/2)
	114	fcom st1 ; compares st0 (pi/2) with st1
	115	fnstsw ax
	116	test ax, X86_FSW_C0 ; st1 > pi
	117	jnz .between_half_pi_and_pi
	118	test ax, X86_FSW_C3
	119	jnz .equals_half_pi
	120
	121	;
	122	; The value is between zero and half pi, including the zero value.
	123	;
	124	; This is in range where FSIN works reasonably reliably. So drop the
	125	; half pi in st0 and do the calculation.
	126	;
	127	.between_zero_and_half_pi:
	128	; Check if we're so close to pi/2 that it makes no difference.
	129	fsub st0, st1 ; st0 = pi/2 - st1
	130	fcom qword [xDX]
	131	fnstsw ax
	132	test ax, X86_FSW_C0 \| X86_FSW_C3 ; st0 <= very small positive number.
	133	jnz .equals_half_pi
	134	ffreep st0
	135
	136	; Check if we're so close to zero that it makes no difference given the
	137	; internal accuracy of the FPU.
	138	fcom qword [xDX]
	139	fnstsw ax
	140	test ax, X86_FSW_C0 \| X86_FSW_C3 ; st0 <= very small positive number.
	141	jnz .equals_zero_popped_one
	142
	143	; Ok, calculate sine.
	144	fsin
	145	jmp .return
	146
	147	;
	148	; The value is in the range ]pi/2,pi[
	149	;
	150	; This is outside the comfortable FSIN range, but if we subtract PI and
	151	; move to the ]-pi/2,0[ range we just have to change the sign to get
	152	; the value we want.
	153	;
	154	.between_half_pi_and_pi:
	155	; Check if we're so close to pi/2 that it makes no difference.
	156	fsubr st0, st1 ; st0 = st1 - st0
	157	fcom qword [xDX]
	158	fnstsw ax
	159	test ax, X86_FSW_C0 \| X86_FSW_C3 ; st0 <= very small positive number.
	160	jnz .equals_half_pi
	161	ffreep st0
	162
	163	; Check if we're so close to pi that it makes no difference.
	164	fldpi
	165	fsub st0, st1 ; st0 = st0 - st1
	166	fcom qword [xDX]
	167	fnstsw ax
	168	test ax, X86_FSW_C0 \| X86_FSW_C3 ; st0 <= very small positive number.
	169	jnz .equals_pi
	170	ffreep st0
	171
	172	; Ok, transform the value and calculate sine.
	173	fldpi
	174	fsubp st1, st0
	175
	176	fsin
	177	fchs
	178	jmp .return
	179
	180	;
	181	; input in the range ]pi,2pi[
	182	;
	183	.larger_than_pi:
	184	fsub st1, st0 ; st1 -= pi
	185	fdiv qword [.s_r64Two xWrtRIP] ; st0 = pi/2
	186
	187	; if (st0 < pi/2)
	188	fcom st1 ; compares st0 (pi/2) with reduced st1
	189	fnstsw ax
	190	test ax, X86_FSW_C0 ; st1 > pi
	191	jnz .between_3_half_pi_and_2pi
	192	test ax, X86_FSW_C3
	193	jnz .equals_3_half_pi
	194
	195	;
	196	; The value is in the the range: ]pi,3pi/2[
	197	;
	198	; The actual st0 is in the range ]pi,pi/2[ where FSIN is performing okay
	199	; and we can get the desired result by changing the sign (-FSIN).
	200	;
	201	.between_pi_and_3_half_pi:
	202	; Check if we're so close to pi/2 that it makes no difference.
	203	fsub st0, st1 ; st0 = pi/2 - st1
	204	fcom qword [xDX]
	205	fnstsw ax
	206	test ax, X86_FSW_C0 \| X86_FSW_C3 ; st0 <= very small positive number.
	207	jnz .equals_3_half_pi
	208	ffreep st0
	209
	210	; Check if we're so close to zero that it makes no difference given the
	211	; internal accuracy of the FPU.
	212	fcom qword [xDX]
	213	fnstsw ax
	214	test ax, X86_FSW_C0 \| X86_FSW_C3 ; st0 <= very small positive number.
	215	jnz .equals_pi_popped
	216
	217	; Ok, calculate sine and flip the sign.
	218	fsin
	219	fchs
	220	jmp .return
	221
	222	;
	223	; The value is in the last pi/2 of the range: ]3pi/2,2pi[
	224	;
	225	; Since FSIN should work reasonably well for ]-pi/2,pi], we can just
	226	; subtract pi again (we subtracted pi at .larger_than_pi above) and
	227	; run FSIN on it. (st1 is currently in the range ]pi/2,pi[.)
	228	;
	229	.between_3_half_pi_and_2pi:
	230	; Check if we're so close to pi/2 that it makes no difference.
	231	fsubr st0, st1 ; st0 = st1 - st0
	232	fcom qword [xDX]
	233	fnstsw ax
	234	test ax, X86_FSW_C0 \| X86_FSW_C3 ; st0 <= very small positive number.
	235	jnz .equals_3_half_pi
	236	ffreep st0
	237
	238	; Check if we're so close to pi that it makes no difference.
	239	fldpi
	240	fsub st0, st1 ; st0 = st0 - st1
	241	fcom qword [xDX]
	242	fnstsw ax
	243	test ax, X86_FSW_C0 \| X86_FSW_C3 ; st0 <= very small positive number.
	244	jnz .equals_2pi
	245	ffreep st0
	246
	247	; Ok, adjust input and calculate sine.
	248	fldpi
	249	fsubp st1, st0
	250	fsin
	251	jmp .return
	252
	253	;
	254	; sin(0) = 0
	255	; sin(pi) = 0
	256	;
	257	.equals_zero:
	258	.equals_pi:
	259	.equals_2pi:
	260	ffreep st0
	261	.equals_zero_popped_one:
	262	.equals_pi_popped:
	263	ffreep st0
	264	fldz
	265	jmp .return
	266
	267	;
	268	; sin(pi/2) = 1
	269	;
	270	.equals_half_pi:
	271	ffreep st0
	272	ffreep st0
	273	fld1
	274	jmp .return
	275
	276	;
	277	; sin(3*pi/2) = -1
	278	;
	279	.equals_3_half_pi:
	280	ffreep st0
	281	ffreep st0
	282	fld1
	283	fchs
	284	jmp .return
	285
	286	;
	287	; Return.
	288	;
	289	.return:
	290	leave
	291	ret
	292
	293	;
	294	; Reduce st0 by reminder division by PI*2. The result should be positive here.
	295	;
	296	;; @todo this is one of our weak spots (really any calculation involving PI is).
	297	.reduce_st0:
	298	fldpi
	299	fadd st0, st0
	300	fxch st1 ; st0=input (dividend) st1=2pi (divisor)
	301	.again:
	302	fprem1
	303	fnstsw ax
	304	test ah, (X86_FSW_C2 >> 8) ; C2 is set if partial result.
	305	jnz .again ; Loop till C2 == 0 and we have a final result.
	306
	307	;
	308	; Make sure the result is positive.
	309	;
	310	fxam
	311	fnstsw ax
	312	test ax, X86_FSW_C1 ; The sign bit
	313	jz .reduced_to_positive
	314
	315	fadd st0, st1 ; st0 += 2pi, which should make it positive
	316
	317	%ifdef RT_STRICT
	318	fxam
	319	fnstsw ax
	320	test ax, X86_FSW_C1
	321	jz .reduced_to_positive
	322	int3
	323	%endif
	324
	325	.reduced_to_positive:
	326	fstp st1 ; Get rid of the 2pi value.
	327	jmp .in_range
	328
	329	ALIGNCODE(8)
	330	.s_r64Max:
	331	dq +6.28318530717958647692 ; 2*pi
	332	.s_r64Min:
	333	dq 0.0
	334	.s_r64Two:
	335	dq 2.0
	336	;;
	337	; Close to 2/pi rounding limits for 32-bit, 64-bit and 80-bit floating point operations.
	338	; Given that the original input is at least +/-3pi/8 (1.178) and that precision of the
	339	; PI constant used during reduction/whatever, I think we can round to a whole pi/2
	340	; step when we get close enough.
	341	;
	342	; Look to RTFLOAT64U for the format details, but 52 is the shift for the exponent field
	343	; and 1023 is the exponent bias. Since the format uses an implied 1 in the mantissa,
	344	; we only have to set the exponent to get a valid number.
	345	;
	346	.s_ar64NearZero:
	347	;; @todo check how sensible these really are...
	348	dq (-18 + 1023) << 52 ; float / 32-bit / single precision input
	349	dq (-40 + 1023) << 52 ; double / 64-bit / double precision input
	350	dq (-52 + 1023) << 52 ; long double / 80-bit / extended precision input
	351	ENDPROC rtNoCrtMathSinCore
	352

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: vbox/trunk/src/VBox/Runtime/common/math/sincore.asm

Download in other formats: