tstIEMAImpl.cpp@ 96407

Last change on this file since 96407 was 96407, checked in by vboxsync, 3 years ago
scm copyright and license note update
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 255.7 KB

Line
1	/* $Id: tstIEMAImpl.cpp 96407 2022-08-22 17:43:14Z vboxsync $ */
2	/** @file
3	* IEM Assembly Instruction Helper Testcase.
4	*/
5
6	/*
7	* Copyright (C) 2022 Oracle and/or its affiliates.
8	*
9	* This file is part of VirtualBox base platform packages, as
10	* available from https://www.virtualbox.org.
11	*
12	* This program is free software; you can redistribute it and/or
13	* modify it under the terms of the GNU General Public License
14	* as published by the Free Software Foundation, in version 3 of the
15	* License.
16	*
17	* This program is distributed in the hope that it will be useful, but
18	* WITHOUT ANY WARRANTY; without even the implied warranty of
19	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20	* General Public License for more details.
21	*
22	* You should have received a copy of the GNU General Public License
23	* along with this program; if not, see <https://www.gnu.org/licenses>.
24	*
25	* SPDX-License-Identifier: GPL-3.0-only
26	*/
27
28
29	/*********************************************************************************************************************************
30	* Header Files *
31	*********************************************************************************************************************************/
32	#include "../include/IEMInternal.h"
33
34	#include <iprt/errcore.h>
35	#include <VBox/log.h>
36	#include <iprt/assert.h>
37	#include <iprt/ctype.h>
38	#include <iprt/getopt.h>
39	#include <iprt/initterm.h>
40	#include <iprt/message.h>
41	#include <iprt/mp.h>
42	#include <iprt/rand.h>
43	#include <iprt/stream.h>
44	#include <iprt/string.h>
45	#include <iprt/test.h>
46
47	#include "tstIEMAImpl.h"
48
49
50	/*********************************************************************************************************************************
51	* Defined Constants And Macros *
52	*********************************************************************************************************************************/
53	#define ENTRY(a_Name) ENTRY_EX(a_Name, 0)
54	#define ENTRY_EX(a_Name, a_uExtra) \
55	{ RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
56	g_aTests_ ## a_Name, &g_cTests_ ## a_Name, \
57	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
58
59	#define ENTRY_BIN(a_Name) ENTRY_EX_BIN(a_Name, 0)
60	#define ENTRY_EX_BIN(a_Name, a_uExtra) \
61	{ RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
62	g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
63	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
64
65	#define ENTRY_INTEL(a_Name, a_fEflUndef) ENTRY_INTEL_EX(a_Name, a_fEflUndef, 0)
66	#define ENTRY_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
67	{ RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
68	g_aTests_ ## a_Name ## _intel, &g_cTests_ ## a_Name ## _intel, \
69	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL }
70
71	#define ENTRY_AMD(a_Name, a_fEflUndef) ENTRY_AMD_EX(a_Name, a_fEflUndef, 0)
72	#define ENTRY_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
73	{ RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
74	g_aTests_ ## a_Name ## _amd, &g_cTests_ ## a_Name ## _amd, \
75	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD }
76
77	#define TYPEDEF_SUBTEST_TYPE(a_TypeName, a_TestType, a_FunctionPtrType) \
78	typedef struct a_TypeName \
79	{ \
80	const char *pszName; \
81	a_FunctionPtrType pfn; \
82	a_FunctionPtrType pfnNative; \
83	a_TestType const *paTests; \
84	uint32_t const *pcTests; \
85	uint32_t uExtra; \
86	uint8_t idxCpuEflFlavour; \
87	} a_TypeName
88
89	#define COUNT_VARIATIONS(a_SubTest) \
90	(1 + ((a_SubTest).idxCpuEflFlavour == g_idxCpuEflFlavour && (a_SubTest).pfnNative) )
91
92
93	/*********************************************************************************************************************************
94	* Global Variables *
95	*********************************************************************************************************************************/
96	static RTTEST g_hTest;
97	static uint8_t g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
98	#ifdef TSTIEMAIMPL_WITH_GENERATOR
99	static uint32_t g_cZeroDstTests = 2;
100	static uint32_t g_cZeroSrcTests = 4;
101	#endif
102	static uint8_t g_pu8, g_pu8Two;
103	static uint16_t g_pu16, g_pu16Two;
104	static uint32_t g_pu32, g_pu32Two, *g_pfEfl;
105	static uint64_t g_pu64, g_pu64Two;
106	static RTUINT128U g_pu128, g_pu128Two;
107
108	static char g_aszBuf[32][256];
109	static unsigned g_idxBuf = 0;
110
111	static uint32_t g_cIncludeTestPatterns;
112	static uint32_t g_cExcludeTestPatterns;
113	static const char *g_apszIncludeTestPatterns[64];
114	static const char *g_apszExcludeTestPatterns[64];
115
116	static unsigned g_cVerbosity = 0;
117
118
119	/*********************************************************************************************************************************
120	* Internal Functions *
121	*********************************************************************************************************************************/
122	static const char *FormatR80(PCRTFLOAT80U pr80);
123	static const char *FormatR64(PCRTFLOAT64U pr64);
124	static const char *FormatR32(PCRTFLOAT32U pr32);
125
126
127	/*
128	* Random helpers.
129	*/
130
131	static uint32_t RandEFlags(void)
132	{
133	uint32_t fEfl = RTRandU32();
134	return (fEfl & X86_EFL_LIVE_MASK) \| X86_EFL_RA1_MASK;
135	}
136
137	#ifdef TSTIEMAIMPL_WITH_GENERATOR
138
139	static uint8_t RandU8(void)
140	{
141	return RTRandU32Ex(0, 0xff);
142	}
143
144
145	static uint16_t RandU16(void)
146	{
147	return RTRandU32Ex(0, 0xffff);
148	}
149
150
151	static uint32_t RandU32(void)
152	{
153	return RTRandU32();
154	}
155
156	#endif
157
158	static uint64_t RandU64(void)
159	{
160	return RTRandU64();
161	}
162
163
164	static RTUINT128U RandU128(void)
165	{
166	RTUINT128U Ret;
167	Ret.s.Hi = RTRandU64();
168	Ret.s.Lo = RTRandU64();
169	return Ret;
170	}
171
172	#ifdef TSTIEMAIMPL_WITH_GENERATOR
173
174	static uint8_t RandU8Dst(uint32_t iTest)
175	{
176	if (iTest < g_cZeroDstTests)
177	return 0;
178	return RandU8();
179	}
180
181
182	static uint8_t RandU8Src(uint32_t iTest)
183	{
184	if (iTest < g_cZeroSrcTests)
185	return 0;
186	return RandU8();
187	}
188
189
190	static uint16_t RandU16Dst(uint32_t iTest)
191	{
192	if (iTest < g_cZeroDstTests)
193	return 0;
194	return RandU16();
195	}
196
197
198	static uint16_t RandU16Src(uint32_t iTest)
199	{
200	if (iTest < g_cZeroSrcTests)
201	return 0;
202	return RandU16();
203	}
204
205
206	static uint32_t RandU32Dst(uint32_t iTest)
207	{
208	if (iTest < g_cZeroDstTests)
209	return 0;
210	return RandU32();
211	}
212
213
214	static uint32_t RandU32Src(uint32_t iTest)
215	{
216	if (iTest < g_cZeroSrcTests)
217	return 0;
218	return RandU32();
219	}
220
221
222	static uint64_t RandU64Dst(uint32_t iTest)
223	{
224	if (iTest < g_cZeroDstTests)
225	return 0;
226	return RandU64();
227	}
228
229
230	static uint64_t RandU64Src(uint32_t iTest)
231	{
232	if (iTest < g_cZeroSrcTests)
233	return 0;
234	return RandU64();
235	}
236
237
238	/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
239	static int16_t RandI16Src2(uint32_t iTest)
240	{
241	if (iTest < 18 * 4)
242	switch (iTest % 4)
243	{
244	case 0: return 0;
245	case 1: return INT16_MAX;
246	case 2: return INT16_MIN;
247	case 3: break;
248	}
249	return (int16_t)RandU16();
250	}
251
252
253	/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
254	static int32_t RandI32Src2(uint32_t iTest)
255	{
256	if (iTest < 18 * 4)
257	switch (iTest % 4)
258	{
259	case 0: return 0;
260	case 1: return INT32_MAX;
261	case 2: return INT32_MIN;
262	case 3: break;
263	}
264	return (int32_t)RandU32();
265	}
266
267
268	#if 0
269	static int64_t RandI64Src(uint32_t iTest)
270	{
271	RT_NOREF(iTest);
272	return (int64_t)RandU64();
273	}
274	#endif
275
276
277	static uint16_t RandFcw(void)
278	{
279	return RandU16() & ~X86_FCW_ZERO_MASK;
280	}
281
282
283	static uint16_t RandFsw(void)
284	{
285	AssertCompile((X86_FSW_C_MASK \| X86_FSW_XCPT_ES_MASK \| X86_FSW_TOP_MASK \| X86_FSW_B) == 0xffff);
286	return RandU16();
287	}
288
289
290	static uint32_t RandMxcsr(void)
291	{
292	return RandU32() & ~X86_MXCSR_ZERO_MASK;
293	}
294
295
296	static void SafeR80FractionShift(PRTFLOAT80U pr80, uint8_t cShift)
297	{
298	if (pr80->sj64.uFraction >= RT_BIT_64(cShift))
299	pr80->sj64.uFraction >>= cShift;
300	else
301	pr80->sj64.uFraction = (cShift % 19) + 1;
302	}
303
304
305
306	static RTFLOAT80U RandR80Ex(uint8_t bType, unsigned cTarget = 80, bool fIntTarget = false)
307	{
308	Assert(cTarget == (!fIntTarget ? 80U : 16U) \|\| cTarget == 64U \|\| cTarget == 32U \|\| (cTarget == 59U && fIntTarget));
309
310	RTFLOAT80U r80;
311	r80.au64[0] = RandU64();
312	r80.au16[4] = RandU16();
313
314	/*
315	* Adjust the random stuff according to bType.
316	*/
317	bType &= 0x1f;
318	if (bType == 0 \|\| bType == 1 \|\| bType == 2 \|\| bType == 3)
319	{
320	/* Zero (0), Pseudo-Infinity (1), Infinity (2), Indefinite (3). We only keep fSign here. */
321	r80.sj64.uExponent = bType == 0 ? 0 : 0x7fff;
322	r80.sj64.uFraction = bType <= 2 ? 0 : RT_BIT_64(62);
323	r80.sj64.fInteger = bType >= 2 ? 1 : 0;
324	AssertMsg(bType != 0 \|\| RTFLOAT80U_IS_ZERO(&r80), ("%s\n", FormatR80(&r80)));
325	AssertMsg(bType != 1 \|\| RTFLOAT80U_IS_PSEUDO_INF(&r80), ("%s\n", FormatR80(&r80)));
326	Assert( bType != 1 \|\| RTFLOAT80U_IS_387_INVALID(&r80));
327	AssertMsg(bType != 2 \|\| RTFLOAT80U_IS_INF(&r80), ("%s\n", FormatR80(&r80)));
328	AssertMsg(bType != 3 \|\| RTFLOAT80U_IS_INDEFINITE(&r80), ("%s\n", FormatR80(&r80)));
329	}
330	else if (bType == 4 \|\| bType == 5 \|\| bType == 6 \|\| bType == 7)
331	{
332	/* Denormals (4,5) and Pseudo denormals (6,7) */
333	if (bType & 1)
334	SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
335	else if (r80.sj64.uFraction == 0 && bType < 6)
336	r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
337	r80.sj64.uExponent = 0;
338	r80.sj64.fInteger = bType >= 6;
339	AssertMsg(bType >= 6 \|\| RTFLOAT80U_IS_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
340	AssertMsg(bType < 6 \|\| RTFLOAT80U_IS_PSEUDO_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
341	}
342	else if (bType == 8 \|\| bType == 9)
343	{
344	/* Pseudo NaN. */
345	if (bType & 1)
346	SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
347	else if (r80.sj64.uFraction == 0 && !r80.sj64.fInteger)
348	r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
349	r80.sj64.uExponent = 0x7fff;
350	if (r80.sj64.fInteger)
351	r80.sj64.uFraction \|= RT_BIT_64(62);
352	else
353	r80.sj64.uFraction &= ~RT_BIT_64(62);
354	r80.sj64.fInteger = 0;
355	AssertMsg(RTFLOAT80U_IS_PSEUDO_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
356	AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
357	Assert(RTFLOAT80U_IS_387_INVALID(&r80));
358	}
359	else if (bType == 10 \|\| bType == 11 \|\| bType == 12 \|\| bType == 13)
360	{
361	/* Quiet and signalling NaNs. */
362	if (bType & 1)
363	SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
364	else if (r80.sj64.uFraction == 0)
365	r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
366	r80.sj64.uExponent = 0x7fff;
367	if (bType < 12)
368	r80.sj64.uFraction \|= RT_BIT_64(62); /* quiet */
369	else
370	r80.sj64.uFraction &= ~RT_BIT_64(62); /* signaling */
371	r80.sj64.fInteger = 1;
372	AssertMsg(bType >= 12 \|\| RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
373	AssertMsg(bType < 12 \|\| RTFLOAT80U_IS_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
374	AssertMsg(RTFLOAT80U_IS_SIGNALLING_NAN(&r80) \|\| RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
375	AssertMsg(RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
376	AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s\n", FormatR80(&r80)));
377	}
378	else if (bType == 14 \|\| bType == 15)
379	{
380	/* Unnormals */
381	if (bType & 1)
382	SafeR80FractionShift(&r80, RandU8() % 62);
383	r80.sj64.fInteger = 0;
384	if (r80.sj64.uExponent == RTFLOAT80U_EXP_MAX \|\| r80.sj64.uExponent == 0)
385	r80.sj64.uExponent = (uint16_t)RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 1);
386	AssertMsg(RTFLOAT80U_IS_UNNORMAL(&r80), ("%s\n", FormatR80(&r80)));
387	Assert(RTFLOAT80U_IS_387_INVALID(&r80));
388	}
389	else if (bType < 26)
390	{
391	/* Make sure we have lots of normalized values. */
392	if (!fIntTarget)
393	{
394	const unsigned uMinExp = cTarget == 64 ? RTFLOAT80U_EXP_BIAS - RTFLOAT64U_EXP_BIAS
395	: cTarget == 32 ? RTFLOAT80U_EXP_BIAS - RTFLOAT32U_EXP_BIAS : 0;
396	const unsigned uMaxExp = cTarget == 64 ? uMinExp + RTFLOAT64U_EXP_MAX
397	: cTarget == 32 ? uMinExp + RTFLOAT32U_EXP_MAX : RTFLOAT80U_EXP_MAX;
398	r80.sj64.fInteger = 1;
399	if (r80.sj64.uExponent <= uMinExp)
400	r80.sj64.uExponent = uMinExp + 1;
401	else if (r80.sj64.uExponent >= uMaxExp)
402	r80.sj64.uExponent = uMaxExp - 1;
403
404	if (bType == 16)
405	{ /* All 1s is useful to testing rounding. Also try trigger special
406	behaviour by sometimes rounding out of range, while we're at it. */
407	r80.sj64.uFraction = RT_BIT_64(63) - 1;
408	uint8_t bExp = RandU8();
409	if ((bExp & 3) == 0)
410	r80.sj64.uExponent = uMaxExp - 1;
411	else if ((bExp & 3) == 1)
412	r80.sj64.uExponent = uMinExp + 1;
413	else if ((bExp & 3) == 2)
414	r80.sj64.uExponent = uMinExp - (bExp & 15); /* (small numbers are mapped to subnormal values) */
415	}
416	}
417	else
418	{
419	/* integer target: */
420	const unsigned uMinExp = RTFLOAT80U_EXP_BIAS;
421	const unsigned uMaxExp = RTFLOAT80U_EXP_BIAS + cTarget - 2;
422	r80.sj64.fInteger = 1;
423	if (r80.sj64.uExponent < uMinExp)
424	r80.sj64.uExponent = uMinExp;
425	else if (r80.sj64.uExponent > uMaxExp)
426	r80.sj64.uExponent = uMaxExp;
427
428	if (bType == 16)
429	{ /* All 1s is useful to testing rounding. Also try trigger special
430	behaviour by sometimes rounding out of range, while we're at it. */
431	r80.sj64.uFraction = RT_BIT_64(63) - 1;
432	uint8_t bExp = RandU8();
433	if ((bExp & 3) == 0)
434	r80.sj64.uExponent = uMaxExp;
435	else if ((bExp & 3) == 1)
436	r80.sj64.uFraction &= ~(RT_BIT_64(cTarget - 1 - r80.sj64.uExponent) - 1); /* no rounding */
437	}
438	}
439
440	AssertMsg(RTFLOAT80U_IS_NORMAL(&r80), ("%s\n", FormatR80(&r80)));
441	}
442	return r80;
443	}
444
445
446	static RTFLOAT80U RandR80(unsigned cTarget = 80, bool fIntTarget = false)
447	{
448	/*
449	* Make it more likely that we get a good selection of special values.
450	*/
451	return RandR80Ex(RandU8(), cTarget, fIntTarget);
452
453	}
454
455
456	static RTFLOAT80U RandR80Src(uint32_t iTest, unsigned cTarget = 80, bool fIntTarget = false)
457	{
458	/* Make sure we cover all the basic types first before going for random selection: */
459	if (iTest <= 18)
460	return RandR80Ex(18 - iTest, cTarget, fIntTarget); /* Starting with 3 normals. */
461	return RandR80(cTarget, fIntTarget);
462	}
463
464
465	/**
466	* Helper for RandR80Src1 and RandR80Src2 that converts bType from a 0..11 range
467	* to a 0..17, covering all basic value types.
468	*/
469	static uint8_t RandR80Src12RemapType(uint8_t bType)
470	{
471	switch (bType)
472	{
473	case 0: return 18; /* normal */
474	case 1: return 16; /* normal extreme rounding */
475	case 2: return 14; /* unnormal */
476	case 3: return 12; /* Signalling NaN */
477	case 4: return 10; /* Quiet NaN */
478	case 5: return 8; /* PseudoNaN */
479	case 6: return 6; /* Pseudo Denormal */
480	case 7: return 4; /* Denormal */
481	case 8: return 3; /* Indefinite */
482	case 9: return 2; /* Infinity */
483	case 10: return 1; /* Pseudo-Infinity */
484	case 11: return 0; /* Zero */
485	default: AssertFailedReturn(18);
486	}
487	}
488
489
490	/**
491	* This works in tandem with RandR80Src2 to make sure we cover all operand
492	* type mixes first before we venture into regular random testing.
493	*
494	* There are 11 basic variations, when we leave out the five odd ones using
495	* SafeR80FractionShift. Because of the special normalized value targetting at
496	* rounding, we make it an even 12. So 144 combinations for two operands.
497	*/
498	static RTFLOAT80U RandR80Src1(uint32_t iTest, unsigned cPartnerBits = 80, bool fPartnerInt = false)
499	{
500	if (cPartnerBits == 80)
501	{
502	Assert(!fPartnerInt);
503	if (iTest < 12 * 12)
504	return RandR80Ex(RandR80Src12RemapType(iTest / 12));
505	}
506	else if ((cPartnerBits == 64 \|\| cPartnerBits == 32) && !fPartnerInt)
507	{
508	if (iTest < 12 * 10)
509	return RandR80Ex(RandR80Src12RemapType(iTest / 10));
510	}
511	else if (iTest < 18 * 4 && fPartnerInt)
512	return RandR80Ex(iTest / 4);
513	return RandR80();
514	}
515
516
517	/** Partner to RandR80Src1. */
518	static RTFLOAT80U RandR80Src2(uint32_t iTest)
519	{
520	if (iTest < 12 * 12)
521	return RandR80Ex(RandR80Src12RemapType(iTest % 12));
522	return RandR80();
523	}
524
525
526	static void SafeR64FractionShift(PRTFLOAT64U pr64, uint8_t cShift)
527	{
528	if (pr64->s64.uFraction >= RT_BIT_64(cShift))
529	pr64->s64.uFraction >>= cShift;
530	else
531	pr64->s64.uFraction = (cShift % 19) + 1;
532	}
533
534
535	static RTFLOAT64U RandR64Ex(uint8_t bType)
536	{
537	RTFLOAT64U r64;
538	r64.u = RandU64();
539
540	/*
541	* Make it more likely that we get a good selection of special values.
542	* On average 6 out of 16 calls should return a special value.
543	*/
544	bType &= 0xf;
545	if (bType == 0 \|\| bType == 1)
546	{
547	/* 0 or Infinity. We only keep fSign here. */
548	r64.s.uExponent = bType == 0 ? 0 : 0x7ff;
549	r64.s.uFractionHigh = 0;
550	r64.s.uFractionLow = 0;
551	AssertMsg(bType != 0 \|\| RTFLOAT64U_IS_ZERO(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
552	AssertMsg(bType != 1 \|\| RTFLOAT64U_IS_INF(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
553	}
554	else if (bType == 2 \|\| bType == 3)
555	{
556	/* Subnormals */
557	if (bType == 3)
558	SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
559	else if (r64.s64.uFraction == 0)
560	r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
561	r64.s64.uExponent = 0;
562	AssertMsg(RTFLOAT64U_IS_SUBNORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
563	}
564	else if (bType == 4 \|\| bType == 5 \|\| bType == 6 \|\| bType == 7)
565	{
566	/* NaNs */
567	if (bType & 1)
568	SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
569	else if (r64.s64.uFraction == 0)
570	r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
571	r64.s64.uExponent = 0x7ff;
572	if (bType < 6)
573	r64.s64.uFraction \|= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* quiet */
574	else
575	r64.s64.uFraction &= ~RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* signalling */
576	AssertMsg(bType >= 6 \|\| RTFLOAT64U_IS_QUIET_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
577	AssertMsg(bType < 6 \|\| RTFLOAT64U_IS_SIGNALLING_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
578	AssertMsg(RTFLOAT64U_IS_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
579	}
580	else if (bType < 12)
581	{
582	/* Make sure we have lots of normalized values. */
583	if (r64.s.uExponent == 0)
584	r64.s.uExponent = 1;
585	else if (r64.s.uExponent == 0x7ff)
586	r64.s.uExponent = 0x7fe;
587	AssertMsg(RTFLOAT64U_IS_NORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
588	}
589	return r64;
590	}
591
592
593	static RTFLOAT64U RandR64Src(uint32_t iTest)
594	{
595	if (iTest < 16)
596	return RandR64Ex(iTest);
597	return RandR64Ex(RandU8());
598	}
599
600
601	/** Pairing with a 80-bit floating point arg. */
602	static RTFLOAT64U RandR64Src2(uint32_t iTest)
603	{
604	if (iTest < 12 * 10)
605	return RandR64Ex(9 - iTest % 10); /* start with normal values */
606	return RandR64Ex(RandU8());
607	}
608
609
610	static void SafeR32FractionShift(PRTFLOAT32U pr32, uint8_t cShift)
611	{
612	if (pr32->s.uFraction >= RT_BIT_32(cShift))
613	pr32->s.uFraction >>= cShift;
614	else
615	pr32->s.uFraction = (cShift % 19) + 1;
616	}
617
618
619	static RTFLOAT32U RandR32Ex(uint8_t bType)
620	{
621	RTFLOAT32U r32;
622	r32.u = RandU32();
623
624	/*
625	* Make it more likely that we get a good selection of special values.
626	* On average 6 out of 16 calls should return a special value.
627	*/
628	bType &= 0xf;
629	if (bType == 0 \|\| bType == 1)
630	{
631	/* 0 or Infinity. We only keep fSign here. */
632	r32.s.uExponent = bType == 0 ? 0 : 0xff;
633	r32.s.uFraction = 0;
634	AssertMsg(bType != 0 \|\| RTFLOAT32U_IS_ZERO(&r32), ("%s\n", FormatR32(&r32)));
635	AssertMsg(bType != 1 \|\| RTFLOAT32U_IS_INF(&r32), ("%s\n", FormatR32(&r32)));
636	}
637	else if (bType == 2 \|\| bType == 3)
638	{
639	/* Subnormals */
640	if (bType == 3)
641	SafeR32FractionShift(&r32, r32.s.uExponent % 22);
642	else if (r32.s.uFraction == 0)
643	r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
644	r32.s.uExponent = 0;
645	AssertMsg(RTFLOAT32U_IS_SUBNORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
646	}
647	else if (bType == 4 \|\| bType == 5 \|\| bType == 6 \|\| bType == 7)
648	{
649	/* NaNs */
650	if (bType & 1)
651	SafeR32FractionShift(&r32, r32.s.uExponent % 22);
652	else if (r32.s.uFraction == 0)
653	r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
654	r32.s.uExponent = 0xff;
655	if (bType < 6)
656	r32.s.uFraction \|= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* quiet */
657	else
658	r32.s.uFraction &= ~RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* signalling */
659	AssertMsg(bType >= 6 \|\| RTFLOAT32U_IS_QUIET_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
660	AssertMsg(bType < 6 \|\| RTFLOAT32U_IS_SIGNALLING_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
661	AssertMsg(RTFLOAT32U_IS_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
662	}
663	else if (bType < 12)
664	{
665	/* Make sure we have lots of normalized values. */
666	if (r32.s.uExponent == 0)
667	r32.s.uExponent = 1;
668	else if (r32.s.uExponent == 0xff)
669	r32.s.uExponent = 0xfe;
670	AssertMsg(RTFLOAT32U_IS_NORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
671	}
672	return r32;
673	}
674
675
676	static RTFLOAT32U RandR32Src(uint32_t iTest)
677	{
678	if (iTest < 16)
679	return RandR32Ex(iTest);
680	return RandR32Ex(RandU8());
681	}
682
683
684	/** Pairing with a 80-bit floating point arg. */
685	static RTFLOAT32U RandR32Src2(uint32_t iTest)
686	{
687	if (iTest < 12 * 10)
688	return RandR32Ex(9 - iTest % 10); /* start with normal values */
689	return RandR32Ex(RandU8());
690	}
691
692
693	static RTPBCD80U RandD80Src(uint32_t iTest)
694	{
695	if (iTest < 3)
696	{
697	RTPBCD80U d80Zero = RTPBCD80U_INIT_ZERO(!(iTest & 1));
698	return d80Zero;
699	}
700	if (iTest < 5)
701	{
702	RTPBCD80U d80Ind = RTPBCD80U_INIT_INDEFINITE();
703	return d80Ind;
704	}
705
706	RTPBCD80U d80;
707	uint8_t b = RandU8();
708	d80.s.fSign = b & 1;
709
710	if ((iTest & 7) >= 6)
711	{
712	/* Illegal */
713	d80.s.uPad = (iTest & 7) == 7 ? b >> 1 : 0;
714	for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
715	d80.s.abPairs[iPair] = RandU8();
716	}
717	else
718	{
719	/* Normal */
720	d80.s.uPad = 0;
721	for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
722	{
723	uint8_t const uLo = (uint8_t)RTRandU32Ex(0, 9);
724	uint8_t const uHi = (uint8_t)RTRandU32Ex(0, 9);
725	d80.s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(uHi, uLo);
726	}
727	}
728	return d80;
729	}
730
731
732	const char *GenFormatR80(PCRTFLOAT80U plrd)
733	{
734	if (RTFLOAT80U_IS_ZERO(plrd))
735	return plrd->s.fSign ? "RTFLOAT80U_INIT_ZERO(1)" : "RTFLOAT80U_INIT_ZERO(0)";
736	if (RTFLOAT80U_IS_INF(plrd))
737	return plrd->s.fSign ? "RTFLOAT80U_INIT_INF(1)" : "RTFLOAT80U_INIT_INF(0)";
738	if (RTFLOAT80U_IS_INDEFINITE(plrd))
739	return plrd->s.fSign ? "RTFLOAT80U_INIT_IND(1)" : "RTFLOAT80U_INIT_IND(0)";
740	if (RTFLOAT80U_IS_QUIET_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
741	return plrd->s.fSign ? "RTFLOAT80U_INIT_QNAN(1)" : "RTFLOAT80U_INIT_QNAN(0)";
742	if (RTFLOAT80U_IS_SIGNALLING_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
743	return plrd->s.fSign ? "RTFLOAT80U_INIT_SNAN(1)" : "RTFLOAT80U_INIT_SNAN(0)";
744
745	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
746	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT80U_INIT_C(%d,%#RX64,%u)",
747	plrd->s.fSign, plrd->s.uMantissa, plrd->s.uExponent);
748	return pszBuf;
749	}
750
751	const char *GenFormatR64(PCRTFLOAT64U prd)
752	{
753	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
754	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT64U_INIT_C(%d,%#RX64,%u)",
755	prd->s.fSign, RT_MAKE_U64(prd->s.uFractionLow, prd->s.uFractionHigh), prd->s.uExponent);
756	return pszBuf;
757	}
758
759
760	const char *GenFormatR32(PCRTFLOAT32U pr)
761	{
762	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
763	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT32U_INIT_C(%d,%#RX32,%u)", pr->s.fSign, pr->s.uFraction, pr->s.uExponent);
764	return pszBuf;
765	}
766
767
768	const char *GenFormatD80(PCRTPBCD80U pd80)
769	{
770	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
771	size_t off;
772	if (pd80->s.uPad == 0)
773	off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_C(%d", pd80->s.fSign);
774	else
775	off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_EX_C(%#x,%d", pd80->s.uPad, pd80->s.fSign);
776	size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
777	while (iPair-- > 0)
778	off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, ",%d,%d",
779	RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair]),
780	RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair]));
781	pszBuf[off++] = ')';
782	pszBuf[off++] = '\0';
783	return pszBuf;
784	}
785
786
787	const char *GenFormatI64(int64_t i64)
788	{
789	if (i64 == INT64_MIN) /* This one is problematic */
790	return "INT64_MIN";
791	if (i64 == INT64_MAX)
792	return "INT64_MAX";
793	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
794	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT64_C(%RI64)", i64);
795	return pszBuf;
796	}
797
798
799	const char GenFormatI64(int64_t const pi64)
800	{
801	return GenFormatI64(*pi64);
802	}
803
804
805	const char *GenFormatI32(int32_t i32)
806	{
807	if (i32 == INT32_MIN) /* This one is problematic */
808	return "INT32_MIN";
809	if (i32 == INT32_MAX)
810	return "INT32_MAX";
811	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
812	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT32_C(%RI32)", i32);
813	return pszBuf;
814	}
815
816
817	const char GenFormatI32(int32_t const pi32)
818	{
819	return GenFormatI32(*pi32);
820	}
821
822
823	const char *GenFormatI16(int16_t i16)
824	{
825	if (i16 == INT16_MIN) /* This one is problematic */
826	return "INT16_MIN";
827	if (i16 == INT16_MAX)
828	return "INT16_MAX";
829	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
830	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT16_C(%RI16)", i16);
831	return pszBuf;
832	}
833
834
835	const char GenFormatI16(int16_t const pi16)
836	{
837	return GenFormatI16(*pi16);
838	}
839
840
841	static void GenerateHeader(PRTSTREAM pOut, const char pszCpuDesc, const char pszCpuType)
842	{
843	/* We want to tag the generated source code with the revision that produced it. */
844	static char s_szRev[] = "$Revision: 96407 $";
845	const char *pszRev = RTStrStripL(strchr(s_szRev, ':') + 1);
846	size_t cchRev = 0;
847	while (RT_C_IS_DIGIT(pszRev[cchRev]))
848	cchRev++;
849
850	RTStrmPrintf(pOut,
851	"/* $Id: tstIEMAImpl.cpp 96407 2022-08-22 17:43:14Z vboxsync $ */\n"
852	"/** @file\n"
853	" * IEM Assembly Instruction Helper Testcase Data%s%s - r%.*s on %s.\n"
854	" */\n"
855	"\n"
856	"/*\n"
857	" * Copyright (C) 2022 Oracle Corporation\n"
858	" *\n"
859	" * This file is part of VirtualBox Open Source Edition (OSE), as\n"
860	" * available from http://www.virtualbox.org. This file is free software;\n"
861	" * you can redistribute it and/or modify it under the terms of the GNU\n"
862	" * General Public License (GPL) as published by the Free Software\n"
863	" * Foundation, in version 2 as it comes in the \"COPYING\" file of the\n"
864	" * VirtualBox OSE distribution. VirtualBox OSE is distributed in the\n"
865	" * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.\n"
866	" */\n"
867	"\n"
868	"#include \"tstIEMAImpl.h\"\n"
869	"\n"
870	,
871	pszCpuType ? " " : "", pszCpuType ? pszCpuType : "", cchRev, pszRev, pszCpuDesc);
872	}
873
874
875	static PRTSTREAM GenerateOpenWithHdr(const char pszFilename, const char pszCpuDesc, const char *pszCpuType)
876	{
877	PRTSTREAM pOut = NULL;
878	int rc = RTStrmOpen(pszFilename, "w", &pOut);
879	if (RT_SUCCESS(rc))
880	{
881	GenerateHeader(pOut, pszCpuDesc, pszCpuType);
882	return pOut;
883	}
884	RTMsgError("Failed to open %s for writing: %Rrc", pszFilename, rc);
885	return NULL;
886	}
887
888
889	static RTEXITCODE GenerateFooterAndClose(PRTSTREAM pOut, const char *pszFilename, RTEXITCODE rcExit)
890	{
891	RTStrmPrintf(pOut,
892	"\n"
893	"/* end of file */\n");
894	int rc = RTStrmClose(pOut);
895	if (RT_SUCCESS(rc))
896	return rcExit;
897	return RTMsgErrorExitFailure("RTStrmClose failed on %s: %Rrc", pszFilename, rc);
898	}
899
900
901	static void GenerateArrayStart(PRTSTREAM pOut, const char pszName, const char pszType)
902	{
903	RTStrmPrintf(pOut, "%s const g_aTests_%s[] =\n{\n", pszType, pszName);
904	}
905
906
907	static void GenerateArrayEnd(PRTSTREAM pOut, const char *pszName)
908	{
909	RTStrmPrintf(pOut,
910	"};\n"
911	"uint32_t const g_cTests_%s = RT_ELEMENTS(g_aTests_%s);\n"
912	"\n",
913	pszName, pszName);
914	}
915
916	#endif /* TSTIEMAIMPL_WITH_GENERATOR */
917
918
919	/*
920	* Test helpers.
921	*/
922	static bool IsTestEnabled(const char *pszName)
923	{
924	/* Process excludes first: */
925	uint32_t i = g_cExcludeTestPatterns;
926	while (i-- > 0)
927	if (RTStrSimplePatternMultiMatch(g_apszExcludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
928	return false;
929
930	/* If no include patterns, everything is included: */
931	i = g_cIncludeTestPatterns;
932	if (!i)
933	return true;
934
935	/* Otherwise only tests in the include patters gets tested: */
936	while (i-- > 0)
937	if (RTStrSimplePatternMultiMatch(g_apszIncludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
938	return true;
939
940	return false;
941	}
942
943
944	static bool SubTestAndCheckIfEnabled(const char *pszName)
945	{
946	RTTestSub(g_hTest, pszName);
947	if (IsTestEnabled(pszName))
948	return true;
949	RTTestSkipped(g_hTest, g_cVerbosity > 0 ? "excluded" : NULL);
950	return false;
951	}
952
953
954	static const char *EFlagsDiff(uint32_t fActual, uint32_t fExpected)
955	{
956	if (fActual == fExpected)
957	return "";
958
959	uint32_t const fXor = fActual ^ fExpected;
960	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
961	size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
962
963	static struct
964	{
965	const char *pszName;
966	uint32_t fFlag;
967	} const s_aFlags[] =
968	{
969	#define EFL_ENTRY(a_Flags) { #a_Flags, X86_EFL_ ## a_Flags }
970	EFL_ENTRY(CF),
971	EFL_ENTRY(PF),
972	EFL_ENTRY(AF),
973	EFL_ENTRY(ZF),
974	EFL_ENTRY(SF),
975	EFL_ENTRY(TF),
976	EFL_ENTRY(IF),
977	EFL_ENTRY(DF),
978	EFL_ENTRY(OF),
979	EFL_ENTRY(IOPL),
980	EFL_ENTRY(NT),
981	EFL_ENTRY(RF),
982	EFL_ENTRY(VM),
983	EFL_ENTRY(AC),
984	EFL_ENTRY(VIF),
985	EFL_ENTRY(VIP),
986	EFL_ENTRY(ID),
987	};
988	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
989	if (s_aFlags[i].fFlag & fXor)
990	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
991	s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
992	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
993	return pszBuf;
994	}
995
996
997	static const char *FswDiff(uint16_t fActual, uint16_t fExpected)
998	{
999	if (fActual == fExpected)
1000	return "";
1001
1002	uint16_t const fXor = fActual ^ fExpected;
1003	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1004	size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1005
1006	static struct
1007	{
1008	const char *pszName;
1009	uint32_t fFlag;
1010	} const s_aFlags[] =
1011	{
1012	#define FSW_ENTRY(a_Flags) { #a_Flags, X86_FSW_ ## a_Flags }
1013	FSW_ENTRY(IE),
1014	FSW_ENTRY(DE),
1015	FSW_ENTRY(ZE),
1016	FSW_ENTRY(OE),
1017	FSW_ENTRY(UE),
1018	FSW_ENTRY(PE),
1019	FSW_ENTRY(SF),
1020	FSW_ENTRY(ES),
1021	FSW_ENTRY(C0),
1022	FSW_ENTRY(C1),
1023	FSW_ENTRY(C2),
1024	FSW_ENTRY(C3),
1025	FSW_ENTRY(B),
1026	};
1027	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1028	if (s_aFlags[i].fFlag & fXor)
1029	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1030	s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1031	if (fXor & X86_FSW_TOP_MASK)
1032	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "/TOP%u!%u",
1033	X86_FSW_TOP_GET(fActual), X86_FSW_TOP_GET(fExpected));
1034	#if 0 /* For debugging fprem & fprem1 */
1035	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " - Q=%d (vs %d)",
1036	X86_FSW_CX_TO_QUOTIENT(fActual), X86_FSW_CX_TO_QUOTIENT(fExpected));
1037	#endif
1038	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1039	return pszBuf;
1040	}
1041
1042
1043	static const char *MxcsrDiff(uint32_t fActual, uint32_t fExpected)
1044	{
1045	if (fActual == fExpected)
1046	return "";
1047
1048	uint16_t const fXor = fActual ^ fExpected;
1049	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1050	size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1051
1052	static struct
1053	{
1054	const char *pszName;
1055	uint32_t fFlag;
1056	} const s_aFlags[] =
1057	{
1058	#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1059	MXCSR_ENTRY(IE),
1060	MXCSR_ENTRY(DE),
1061	MXCSR_ENTRY(ZE),
1062	MXCSR_ENTRY(OE),
1063	MXCSR_ENTRY(UE),
1064	MXCSR_ENTRY(PE),
1065
1066	MXCSR_ENTRY(IM),
1067	MXCSR_ENTRY(DM),
1068	MXCSR_ENTRY(ZM),
1069	MXCSR_ENTRY(OM),
1070	MXCSR_ENTRY(UM),
1071	MXCSR_ENTRY(PM),
1072
1073	MXCSR_ENTRY(DAZ),
1074	MXCSR_ENTRY(FZ),
1075	#undef MXCSR_ENTRY
1076	};
1077	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1078	if (s_aFlags[i].fFlag & fXor)
1079	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1080	s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1081	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1082	return pszBuf;
1083	}
1084
1085
1086	static const char *FormatFcw(uint16_t fFcw)
1087	{
1088	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1089
1090	const char pszPC = NULL; / (msc+gcc are too stupid) */
1091	switch (fFcw & X86_FCW_PC_MASK)
1092	{
1093	case X86_FCW_PC_24: pszPC = "PC24"; break;
1094	case X86_FCW_PC_RSVD: pszPC = "PCRSVD!"; break;
1095	case X86_FCW_PC_53: pszPC = "PC53"; break;
1096	case X86_FCW_PC_64: pszPC = "PC64"; break;
1097	}
1098
1099	const char pszRC = NULL; / (msc+gcc are too stupid) */
1100	switch (fFcw & X86_FCW_RC_MASK)
1101	{
1102	case X86_FCW_RC_NEAREST: pszRC = "NEAR"; break;
1103	case X86_FCW_RC_DOWN: pszRC = "DOWN"; break;
1104	case X86_FCW_RC_UP: pszRC = "UP"; break;
1105	case X86_FCW_RC_ZERO: pszRC = "ZERO"; break;
1106	}
1107	size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s %s", pszPC, pszRC);
1108
1109	static struct
1110	{
1111	const char *pszName;
1112	uint32_t fFlag;
1113	} const s_aFlags[] =
1114	{
1115	#define FCW_ENTRY(a_Flags) { #a_Flags, X86_FCW_ ## a_Flags }
1116	FCW_ENTRY(IM),
1117	FCW_ENTRY(DM),
1118	FCW_ENTRY(ZM),
1119	FCW_ENTRY(OM),
1120	FCW_ENTRY(UM),
1121	FCW_ENTRY(PM),
1122	{ "6M", 64 },
1123	};
1124	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1125	if (fFcw & s_aFlags[i].fFlag)
1126	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1127
1128	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1129	return pszBuf;
1130	}
1131
1132
1133	static const char *FormatMxcsr(uint32_t fMxcsr)
1134	{
1135	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1136
1137	const char pszRC = NULL; / (msc+gcc are too stupid) */
1138	switch (fMxcsr & X86_MXCSR_RC_MASK)
1139	{
1140	case X86_MXCSR_RC_NEAREST: pszRC = "NEAR"; break;
1141	case X86_MXCSR_RC_DOWN: pszRC = "DOWN"; break;
1142	case X86_MXCSR_RC_UP: pszRC = "UP"; break;
1143	case X86_MXCSR_RC_ZERO: pszRC = "ZERO"; break;
1144	}
1145
1146	const char *pszDAZ = fMxcsr & X86_MXCSR_DAZ ? " DAZ" : "";
1147	const char *pszFZ = fMxcsr & X86_MXCSR_FZ ? " FZ" : "";
1148	size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s%s%s", pszRC, pszDAZ, pszFZ);
1149
1150	static struct
1151	{
1152	const char *pszName;
1153	uint32_t fFlag;
1154	} const s_aFlags[] =
1155	{
1156	#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1157	MXCSR_ENTRY(IE),
1158	MXCSR_ENTRY(DE),
1159	MXCSR_ENTRY(ZE),
1160	MXCSR_ENTRY(OE),
1161	MXCSR_ENTRY(UE),
1162	MXCSR_ENTRY(PE),
1163
1164	MXCSR_ENTRY(IM),
1165	MXCSR_ENTRY(DM),
1166	MXCSR_ENTRY(ZM),
1167	MXCSR_ENTRY(OM),
1168	MXCSR_ENTRY(UM),
1169	MXCSR_ENTRY(PM),
1170	{ "6M", 64 },
1171	};
1172	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1173	if (fMxcsr & s_aFlags[i].fFlag)
1174	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1175
1176	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1177	return pszBuf;
1178	}
1179
1180
1181	static const char *FormatR80(PCRTFLOAT80U pr80)
1182	{
1183	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1184	RTStrFormatR80(pszBuf, sizeof(g_aszBuf[0]), pr80, 0, 0, RTSTR_F_SPECIAL);
1185	return pszBuf;
1186	}
1187
1188
1189	static const char *FormatR64(PCRTFLOAT64U pr64)
1190	{
1191	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1192	RTStrFormatR64(pszBuf, sizeof(g_aszBuf[0]), pr64, 0, 0, RTSTR_F_SPECIAL);
1193	return pszBuf;
1194	}
1195
1196
1197	static const char *FormatR32(PCRTFLOAT32U pr32)
1198	{
1199	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1200	RTStrFormatR32(pszBuf, sizeof(g_aszBuf[0]), pr32, 0, 0, RTSTR_F_SPECIAL);
1201	return pszBuf;
1202	}
1203
1204
1205	static const char *FormatD80(PCRTPBCD80U pd80)
1206	{
1207	/* There is only one indefinite endcoding (same as for 80-bit
1208	floating point), so get it out of the way first: */
1209	if (RTPBCD80U_IS_INDEFINITE(pd80))
1210	return "Ind";
1211
1212	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1213	size_t off = 0;
1214	pszBuf[off++] = pd80->s.fSign ? '-' : '+';
1215	unsigned cBadDigits = 0;
1216	size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
1217	while (iPair-- > 0)
1218	{
1219	static const char s_szDigits[] = "0123456789abcdef";
1220	static const uint8_t s_bBadDigits[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
1221	pszBuf[off++] = s_szDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])];
1222	pszBuf[off++] = s_szDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1223	cBadDigits += s_bBadDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])]
1224	+ s_bBadDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1225	}
1226	if (cBadDigits \|\| pd80->s.uPad != 0)
1227	off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, "[%u,%#x]", cBadDigits, pd80->s.uPad);
1228	pszBuf[off] = '\0';
1229	return pszBuf;
1230	}
1231
1232
1233	#if 0
1234	static const char FormatI64(int64_t const piVal)
1235	{
1236	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1237	RTStrFormatU64(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL \| RTSTR_F_VALSIGNED);
1238	return pszBuf;
1239	}
1240	#endif
1241
1242
1243	static const char FormatI32(int32_t const piVal)
1244	{
1245	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1246	RTStrFormatU32(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL \| RTSTR_F_VALSIGNED);
1247	return pszBuf;
1248	}
1249
1250
1251	static const char FormatI16(int16_t const piVal)
1252	{
1253	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1254	RTStrFormatU16(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL \| RTSTR_F_VALSIGNED);
1255	return pszBuf;
1256	}
1257
1258
1259	/*
1260	* Binary operations.
1261	*/
1262	TYPEDEF_SUBTEST_TYPE(BINU8_T, BINU8_TEST_T, PFNIEMAIMPLBINU8);
1263	TYPEDEF_SUBTEST_TYPE(BINU16_T, BINU16_TEST_T, PFNIEMAIMPLBINU16);
1264	TYPEDEF_SUBTEST_TYPE(BINU32_T, BINU32_TEST_T, PFNIEMAIMPLBINU32);
1265	TYPEDEF_SUBTEST_TYPE(BINU64_T, BINU64_TEST_T, PFNIEMAIMPLBINU64);
1266
1267	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1268	# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1269	static void BinU ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
1270	{ \
1271	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aBinU ## a_cBits); iFn++) \
1272	{ \
1273	PFNIEMAIMPLBINU ## a_cBits const pfn = g_aBinU ## a_cBits[iFn].pfnNative \
1274	? g_aBinU ## a_cBits[iFn].pfnNative : g_aBinU ## a_cBits[iFn].pfn; \
1275	PRTSTREAM pOutFn = pOut; \
1276	if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
1277	{ \
1278	if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1279	continue; \
1280	pOutFn = pOutCpu; \
1281	} \
1282	\
1283	GenerateArrayStart(pOutFn, g_aBinU ## a_cBits[iFn].pszName, #a_TestType); \
1284	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1285	{ \
1286	a_TestType Test; \
1287	Test.fEflIn = RandEFlags(); \
1288	Test.fEflOut = Test.fEflIn; \
1289	Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1290	Test.uDstOut = Test.uDstIn; \
1291	Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1292	if (g_aBinU ## a_cBits[iFn].uExtra) \
1293	Test.uSrcIn &= a_cBits - 1; /* Restrict bit index according to operand width */ \
1294	Test.uMisc = 0; \
1295	pfn(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut); \
1296	RTStrmPrintf(pOutFn, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %#x }, /* #%u */\n", \
1297	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1298	} \
1299	GenerateArrayEnd(pOutFn, g_aBinU ## a_cBits[iFn].pszName); \
1300	} \
1301	}
1302	#else
1303	# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType)
1304	#endif
1305
1306	#define TEST_BINARY_OPS(a_cBits, a_uType, a_Fmt, a_TestType, a_aSubTests) \
1307	GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1308	\
1309	static void BinU ## a_cBits ## Test(void) \
1310	{ \
1311	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1312	{ \
1313	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1314	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1315	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1316	PFNIEMAIMPLBINU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1317	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1318	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1319	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1320	{ \
1321	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1322	{ \
1323	uint32_t fEfl = paTests[iTest].fEflIn; \
1324	a_uType uDst = paTests[iTest].uDstIn; \
1325	pfn(&uDst, paTests[iTest].uSrcIn, &fEfl); \
1326	if ( uDst != paTests[iTest].uDstOut \
1327	\|\| fEfl != paTests[iTest].fEflOut) \
1328	RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s - %s\n", \
1329	iTest, !iVar ? "" : "/n", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1330	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1331	EFlagsDiff(fEfl, paTests[iTest].fEflOut), \
1332	uDst == paTests[iTest].uDstOut ? "eflags" : fEfl == paTests[iTest].fEflOut ? "dst" : "both"); \
1333	else \
1334	{ \
1335	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1336	*g_pfEfl = paTests[iTest].fEflIn; \
1337	pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, g_pfEfl); \
1338	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1339	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1340	} \
1341	} \
1342	pfn = a_aSubTests[iFn].pfnNative; \
1343	} \
1344	} \
1345	}
1346
1347
1348	/*
1349	* 8-bit binary operations.
1350	*/
1351	static const BINU8_T g_aBinU8[] =
1352	{
1353	ENTRY(add_u8),
1354	ENTRY(add_u8_locked),
1355	ENTRY(adc_u8),
1356	ENTRY(adc_u8_locked),
1357	ENTRY(sub_u8),
1358	ENTRY(sub_u8_locked),
1359	ENTRY(sbb_u8),
1360	ENTRY(sbb_u8_locked),
1361	ENTRY(or_u8),
1362	ENTRY(or_u8_locked),
1363	ENTRY(xor_u8),
1364	ENTRY(xor_u8_locked),
1365	ENTRY(and_u8),
1366	ENTRY(and_u8_locked),
1367	ENTRY(cmp_u8),
1368	ENTRY(test_u8),
1369	};
1370	TEST_BINARY_OPS(8, uint8_t, "%#04x", BINU8_TEST_T, g_aBinU8)
1371
1372
1373	/*
1374	* 16-bit binary operations.
1375	*/
1376	static const BINU16_T g_aBinU16[] =
1377	{
1378	ENTRY(add_u16),
1379	ENTRY(add_u16_locked),
1380	ENTRY(adc_u16),
1381	ENTRY(adc_u16_locked),
1382	ENTRY(sub_u16),
1383	ENTRY(sub_u16_locked),
1384	ENTRY(sbb_u16),
1385	ENTRY(sbb_u16_locked),
1386	ENTRY(or_u16),
1387	ENTRY(or_u16_locked),
1388	ENTRY(xor_u16),
1389	ENTRY(xor_u16_locked),
1390	ENTRY(and_u16),
1391	ENTRY(and_u16_locked),
1392	ENTRY(cmp_u16),
1393	ENTRY(test_u16),
1394	ENTRY_EX(bt_u16, 1),
1395	ENTRY_EX(btc_u16, 1),
1396	ENTRY_EX(btc_u16_locked, 1),
1397	ENTRY_EX(btr_u16, 1),
1398	ENTRY_EX(btr_u16_locked, 1),
1399	ENTRY_EX(bts_u16, 1),
1400	ENTRY_EX(bts_u16_locked, 1),
1401	ENTRY_AMD( bsf_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1402	ENTRY_INTEL(bsf_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1403	ENTRY_AMD( bsr_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1404	ENTRY_INTEL(bsr_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1405	ENTRY_AMD( imul_two_u16, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1406	ENTRY_INTEL(imul_two_u16, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1407	ENTRY(arpl),
1408	};
1409	TEST_BINARY_OPS(16, uint16_t, "%#06x", BINU16_TEST_T, g_aBinU16)
1410
1411
1412	/*
1413	* 32-bit binary operations.
1414	*/
1415	static const BINU32_T g_aBinU32[] =
1416	{
1417	ENTRY(add_u32),
1418	ENTRY(add_u32_locked),
1419	ENTRY(adc_u32),
1420	ENTRY(adc_u32_locked),
1421	ENTRY(sub_u32),
1422	ENTRY(sub_u32_locked),
1423	ENTRY(sbb_u32),
1424	ENTRY(sbb_u32_locked),
1425	ENTRY(or_u32),
1426	ENTRY(or_u32_locked),
1427	ENTRY(xor_u32),
1428	ENTRY(xor_u32_locked),
1429	ENTRY(and_u32),
1430	ENTRY(and_u32_locked),
1431	ENTRY(cmp_u32),
1432	ENTRY(test_u32),
1433	ENTRY_EX(bt_u32, 1),
1434	ENTRY_EX(btc_u32, 1),
1435	ENTRY_EX(btc_u32_locked, 1),
1436	ENTRY_EX(btr_u32, 1),
1437	ENTRY_EX(btr_u32_locked, 1),
1438	ENTRY_EX(bts_u32, 1),
1439	ENTRY_EX(bts_u32_locked, 1),
1440	ENTRY_AMD( bsf_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1441	ENTRY_INTEL(bsf_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1442	ENTRY_AMD( bsr_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1443	ENTRY_INTEL(bsr_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1444	ENTRY_AMD( imul_two_u32, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1445	ENTRY_INTEL(imul_two_u32, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1446	};
1447	TEST_BINARY_OPS(32, uint32_t, "%#010RX32", BINU32_TEST_T, g_aBinU32)
1448
1449
1450	/*
1451	* 64-bit binary operations.
1452	*/
1453	static const BINU64_T g_aBinU64[] =
1454	{
1455	ENTRY(add_u64),
1456	ENTRY(add_u64_locked),
1457	ENTRY(adc_u64),
1458	ENTRY(adc_u64_locked),
1459	ENTRY(sub_u64),
1460	ENTRY(sub_u64_locked),
1461	ENTRY(sbb_u64),
1462	ENTRY(sbb_u64_locked),
1463	ENTRY(or_u64),
1464	ENTRY(or_u64_locked),
1465	ENTRY(xor_u64),
1466	ENTRY(xor_u64_locked),
1467	ENTRY(and_u64),
1468	ENTRY(and_u64_locked),
1469	ENTRY(cmp_u64),
1470	ENTRY(test_u64),
1471	ENTRY_EX(bt_u64, 1),
1472	ENTRY_EX(btc_u64, 1),
1473	ENTRY_EX(btc_u64_locked, 1),
1474	ENTRY_EX(btr_u64, 1),
1475	ENTRY_EX(btr_u64_locked, 1),
1476	ENTRY_EX(bts_u64, 1),
1477	ENTRY_EX(bts_u64_locked, 1),
1478	ENTRY_AMD( bsf_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1479	ENTRY_INTEL(bsf_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1480	ENTRY_AMD( bsr_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1481	ENTRY_INTEL(bsr_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1482	ENTRY_AMD( imul_two_u64, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1483	ENTRY_INTEL(imul_two_u64, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1484	};
1485	TEST_BINARY_OPS(64, uint64_t, "%#018RX64", BINU64_TEST_T, g_aBinU64)
1486
1487
1488	/*
1489	* XCHG
1490	*/
1491	static void XchgTest(void)
1492	{
1493	if (!SubTestAndCheckIfEnabled("xchg"))
1494	return;
1495	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU8, (uint8_t pu8Mem, uint8_t pu8Reg));
1496	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU16,(uint16_t pu16Mem, uint16_t pu16Reg));
1497	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU32,(uint32_t pu32Mem, uint32_t pu32Reg));
1498	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU64,(uint64_t pu64Mem, uint64_t pu64Reg));
1499
1500	static struct
1501	{
1502	uint8_t cb; uint64_t fMask;
1503	union
1504	{
1505	uintptr_t pfn;
1506	FNIEMAIMPLXCHGU8 *pfnU8;
1507	FNIEMAIMPLXCHGU16 *pfnU16;
1508	FNIEMAIMPLXCHGU32 *pfnU32;
1509	FNIEMAIMPLXCHGU64 *pfnU64;
1510	} u;
1511	}
1512	s_aXchgWorkers[] =
1513	{
1514	{ 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_locked } },
1515	{ 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_locked } },
1516	{ 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_locked } },
1517	{ 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_locked } },
1518	{ 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_unlocked } },
1519	{ 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_unlocked } },
1520	{ 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_unlocked } },
1521	{ 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_unlocked } },
1522	};
1523	for (size_t i = 0; i < RT_ELEMENTS(s_aXchgWorkers); i++)
1524	{
1525	RTUINT64U uIn1, uIn2, uMem, uDst;
1526	uMem.u = uIn1.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1527	uDst.u = uIn2.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1528	if (uIn1.u == uIn2.u)
1529	uDst.u = uIn2.u = ~uIn2.u;
1530
1531	switch (s_aXchgWorkers[i].cb)
1532	{
1533	case 1:
1534	s_aXchgWorkers[i].u.pfnU8(g_pu8, g_pu8Two);
1535	s_aXchgWorkers[i].u.pfnU8(&uMem.au8[0], &uDst.au8[0]);
1536	break;
1537	case 2:
1538	s_aXchgWorkers[i].u.pfnU16(g_pu16, g_pu16Two);
1539	s_aXchgWorkers[i].u.pfnU16(&uMem.Words.w0, &uDst.Words.w0);
1540	break;
1541	case 4:
1542	s_aXchgWorkers[i].u.pfnU32(g_pu32, g_pu32Two);
1543	s_aXchgWorkers[i].u.pfnU32(&uMem.DWords.dw0, &uDst.DWords.dw0);
1544	break;
1545	case 8:
1546	s_aXchgWorkers[i].u.pfnU64(g_pu64, g_pu64Two);
1547	s_aXchgWorkers[i].u.pfnU64(&uMem.u, &uDst.u);
1548	break;
1549	default: RTTestFailed(g_hTest, "%d\n", s_aXchgWorkers[i].cb); break;
1550	}
1551
1552	if (uMem.u != uIn2.u \|\| uDst.u != uIn1.u)
1553	RTTestFailed(g_hTest, "i=%u: %#RX64, %#RX64 -> %#RX64, %#RX64\n", i, uIn1.u, uIn2.u, uMem.u, uDst.u);
1554	}
1555	}
1556
1557
1558	/*
1559	* XADD
1560	*/
1561	static void XaddTest(void)
1562	{
1563	#define TEST_XADD(a_cBits, a_Type, a_Fmt) do { \
1564	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXADDU ## a_cBits, (a_Type , a_Type , uint32_t *)); \
1565	static struct \
1566	{ \
1567	const char *pszName; \
1568	FNIEMAIMPLXADDU ## a_cBits *pfn; \
1569	BINU ## a_cBits ## _TEST_T const *paTests; \
1570	uint32_t const *pcTests; \
1571	} const s_aFuncs[] = \
1572	{ \
1573	{ "xadd_u" # a_cBits, iemAImpl_xadd_u ## a_cBits, \
1574	g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1575	{ "xadd_u" # a_cBits "8_locked", iemAImpl_xadd_u ## a_cBits ## _locked, \
1576	g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1577	}; \
1578	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1579	{ \
1580	if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1581	uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1582	BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1583	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1584	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1585	{ \
1586	uint32_t fEfl = paTests[iTest].fEflIn; \
1587	a_Type uSrc = paTests[iTest].uSrcIn; \
1588	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1589	s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uSrc, &fEfl); \
1590	if ( fEfl != paTests[iTest].fEflOut \
1591	\|\| *g_pu ## a_cBits != paTests[iTest].uDstOut \
1592	\|\| uSrc != paTests[iTest].uDstIn) \
1593	RTTestFailed(g_hTest, "%s/#%u: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt " src=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1594	s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1595	fEfl, *g_pu ## a_cBits, uSrc, paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].uDstIn, \
1596	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1597	} \
1598	} \
1599	} while(0)
1600	TEST_XADD(8, uint8_t, "%#04x");
1601	TEST_XADD(16, uint16_t, "%#06x");
1602	TEST_XADD(32, uint32_t, "%#010RX32");
1603	TEST_XADD(64, uint64_t, "%#010RX64");
1604	}
1605
1606
1607	/*
1608	* CMPXCHG
1609	*/
1610
1611	static void CmpXchgTest(void)
1612	{
1613	#define TEST_CMPXCHG(a_cBits, a_Type, a_Fmt) do {\
1614	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHGU ## a_cBits, (a_Type , a_Type , a_Type, uint32_t *)); \
1615	static struct \
1616	{ \
1617	const char *pszName; \
1618	FNIEMAIMPLCMPXCHGU ## a_cBits *pfn; \
1619	PFNIEMAIMPLBINU ## a_cBits pfnSub; \
1620	BINU ## a_cBits ## _TEST_T const *paTests; \
1621	uint32_t const *pcTests; \
1622	} const s_aFuncs[] = \
1623	{ \
1624	{ "cmpxchg_u" # a_cBits, iemAImpl_cmpxchg_u ## a_cBits, iemAImpl_sub_u ## a_cBits, \
1625	g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1626	{ "cmpxchg_u" # a_cBits "_locked", iemAImpl_cmpxchg_u ## a_cBits ## _locked, iemAImpl_sub_u ## a_cBits, \
1627	g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1628	}; \
1629	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1630	{ \
1631	if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1632	BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1633	uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1634	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1635	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1636	{ \
1637	/* as is (99% likely to be negative). */ \
1638	uint32_t fEfl = paTests[iTest].fEflIn; \
1639	a_Type const uNew = paTests[iTest].uSrcIn + 0x42; \
1640	a_Type uA = paTests[iTest].uDstIn; \
1641	*g_pu ## a_cBits = paTests[iTest].uSrcIn; \
1642	a_Type const uExpect = uA != paTests[iTest].uSrcIn ? paTests[iTest].uSrcIn : uNew; \
1643	s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1644	if ( fEfl != paTests[iTest].fEflOut \
1645	\|\| *g_pu ## a_cBits != uExpect \
1646	\|\| uA != paTests[iTest].uSrcIn) \
1647	RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1648	s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uSrcIn, paTests[iTest].uDstIn, \
1649	uNew, fEfl, *g_pu ## a_cBits, uA, paTests[iTest].fEflOut, uExpect, paTests[iTest].uSrcIn, \
1650	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1651	/* positive */ \
1652	uint32_t fEflExpect = paTests[iTest].fEflIn; \
1653	uA = paTests[iTest].uDstIn; \
1654	s_aFuncs[iFn].pfnSub(&uA, uA, &fEflExpect); \
1655	fEfl = paTests[iTest].fEflIn; \
1656	uA = paTests[iTest].uDstIn; \
1657	*g_pu ## a_cBits = uA; \
1658	s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1659	if ( fEfl != fEflExpect \
1660	\|\| *g_pu ## a_cBits != uNew \
1661	\|\| uA != paTests[iTest].uDstIn) \
1662	RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1663	s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uDstIn, \
1664	uNew, fEfl, *g_pu ## a_cBits, uA, fEflExpect, uNew, paTests[iTest].uDstIn, \
1665	EFlagsDiff(fEfl, fEflExpect)); \
1666	} \
1667	} \
1668	} while(0)
1669	TEST_CMPXCHG(8, uint8_t, "%#04RX8");
1670	TEST_CMPXCHG(16, uint16_t, "%#06x");
1671	TEST_CMPXCHG(32, uint32_t, "%#010RX32");
1672	#if ARCH_BITS != 32 /* calling convension issue, skipping as it's an unsupported host */
1673	TEST_CMPXCHG(64, uint64_t, "%#010RX64");
1674	#endif
1675	}
1676
1677	static void CmpXchg8bTest(void)
1678	{
1679	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG8B,(uint64_t , PRTUINT64U, PRTUINT64U, uint32_t ));
1680	static struct
1681	{
1682	const char *pszName;
1683	FNIEMAIMPLCMPXCHG8B *pfn;
1684	} const s_aFuncs[] =
1685	{
1686	{ "cmpxchg8b", iemAImpl_cmpxchg8b },
1687	{ "cmpxchg8b_locked", iemAImpl_cmpxchg8b_locked },
1688	};
1689	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1690	{
1691	if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1692	continue;
1693	for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1694	{
1695	uint64_t const uOldValue = RandU64();
1696	uint64_t const uNewValue = RandU64();
1697
1698	/* positive test. */
1699	RTUINT64U uA, uB;
1700	uB.u = uNewValue;
1701	uA.u = uOldValue;
1702	*g_pu64 = uOldValue;
1703	uint32_t fEflIn = RandEFlags();
1704	uint32_t fEfl = fEflIn;
1705	s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1706	if ( fEfl != (fEflIn \| X86_EFL_ZF)
1707	\|\| *g_pu64 != uNewValue
1708	\|\| uA.u != uOldValue)
1709	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1710	iTest, fEflIn, uOldValue, uOldValue, uNewValue,
1711	fEfl, *g_pu64, uA.u,
1712	(fEflIn \| X86_EFL_ZF), uNewValue, uOldValue, EFlagsDiff(fEfl, fEflIn \| X86_EFL_ZF));
1713	RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1714
1715	/* negative */
1716	uint64_t const uExpect = ~uOldValue;
1717	*g_pu64 = uExpect;
1718	uA.u = uOldValue;
1719	uB.u = uNewValue;
1720	fEfl = fEflIn = RandEFlags();
1721	s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1722	if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1723	\|\| *g_pu64 != uExpect
1724	\|\| uA.u != uExpect)
1725	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1726	iTest + 1, fEflIn, uExpect, uOldValue, uNewValue,
1727	fEfl, *g_pu64, uA.u,
1728	(fEflIn & ~X86_EFL_ZF), uExpect, uExpect, EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1729	RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1730	}
1731	}
1732	}
1733
1734	static void CmpXchg16bTest(void)
1735	{
1736	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG16B,(PRTUINT128U, PRTUINT128U, PRTUINT128U, uint32_t *));
1737	static struct
1738	{
1739	const char *pszName;
1740	FNIEMAIMPLCMPXCHG16B *pfn;
1741	} const s_aFuncs[] =
1742	{
1743	{ "cmpxchg16b", iemAImpl_cmpxchg16b },
1744	{ "cmpxchg16b_locked", iemAImpl_cmpxchg16b_locked },
1745	#if !defined(RT_ARCH_ARM64)
1746	{ "cmpxchg16b_fallback", iemAImpl_cmpxchg16b_fallback },
1747	#endif
1748	};
1749	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1750	{
1751	if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1752	continue;
1753	#if !defined(IEM_WITHOUT_ASSEMBLY) && defined(RT_ARCH_AMD64)
1754	if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_CX16))
1755	{
1756	RTTestSkipped(g_hTest, "no hardware cmpxchg16b");
1757	continue;
1758	}
1759	#endif
1760	for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1761	{
1762	RTUINT128U const uOldValue = RandU128();
1763	RTUINT128U const uNewValue = RandU128();
1764
1765	/* positive test. */
1766	RTUINT128U uA, uB;
1767	uB = uNewValue;
1768	uA = uOldValue;
1769	*g_pu128 = uOldValue;
1770	uint32_t fEflIn = RandEFlags();
1771	uint32_t fEfl = fEflIn;
1772	s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1773	if ( fEfl != (fEflIn \| X86_EFL_ZF)
1774	\|\| g_pu128->s.Lo != uNewValue.s.Lo
1775	\|\| g_pu128->s.Hi != uNewValue.s.Hi
1776	\|\| uA.s.Lo != uOldValue.s.Lo
1777	\|\| uA.s.Hi != uOldValue.s.Hi)
1778	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1779	" -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1780	" wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1781	iTest, fEflIn, uOldValue.s.Hi, uOldValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1782	fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1783	(fEflIn \| X86_EFL_ZF), uNewValue.s.Hi, uNewValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo,
1784	EFlagsDiff(fEfl, fEflIn \| X86_EFL_ZF));
1785	RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1786
1787	/* negative */
1788	RTUINT128U const uExpect = RTUINT128_INIT(~uOldValue.s.Hi, ~uOldValue.s.Lo);
1789	*g_pu128 = uExpect;
1790	uA = uOldValue;
1791	uB = uNewValue;
1792	fEfl = fEflIn = RandEFlags();
1793	s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1794	if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1795	\|\| g_pu128->s.Lo != uExpect.s.Lo
1796	\|\| g_pu128->s.Hi != uExpect.s.Hi
1797	\|\| uA.s.Lo != uExpect.s.Lo
1798	\|\| uA.s.Hi != uExpect.s.Hi)
1799	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1800	" -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1801	" wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1802	iTest + 1, fEflIn, uExpect.s.Hi, uExpect.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1803	fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1804	(fEflIn & ~X86_EFL_ZF), uExpect.s.Hi, uExpect.s.Lo, uExpect.s.Hi, uExpect.s.Lo,
1805	EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1806	RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1807	}
1808	}
1809	}
1810
1811
1812	/*
1813	* Double shifts.
1814	*
1815	* Note! We use BINUxx_TEST_T with the shift value in the uMisc field.
1816	*/
1817	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1818	# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1819	void ShiftDblU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1820	{ \
1821	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1822	{ \
1823	if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1824	&& a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1825	continue; \
1826	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1827	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1828	{ \
1829	a_TestType Test; \
1830	Test.fEflIn = RandEFlags(); \
1831	Test.fEflOut = Test.fEflIn; \
1832	Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1833	Test.uDstOut = Test.uDstIn; \
1834	Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1835	Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
1836	a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, Test.uMisc, &Test.fEflOut); \
1837	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %2u }, /* #%u */\n", \
1838	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1839	} \
1840	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
1841	} \
1842	}
1843	#else
1844	# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests)
1845	#endif
1846
1847	#define TEST_SHIFT_DBL(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
1848	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTDBLU ## a_cBits); \
1849	\
1850	static a_SubTestType const a_aSubTests[] = \
1851	{ \
1852	ENTRY_AMD(shld_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
1853	ENTRY_INTEL(shld_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
1854	ENTRY_AMD(shrd_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
1855	ENTRY_INTEL(shrd_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
1856	}; \
1857	\
1858	GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1859	\
1860	static void ShiftDblU ## a_cBits ## Test(void) \
1861	{ \
1862	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1863	{ \
1864	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1865	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1866	PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1867	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1868	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1869	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1870	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1871	{ \
1872	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1873	{ \
1874	uint32_t fEfl = paTests[iTest].fEflIn; \
1875	a_Type uDst = paTests[iTest].uDstIn; \
1876	pfn(&uDst, paTests[iTest].uSrcIn, paTests[iTest].uMisc, &fEfl); \
1877	if ( uDst != paTests[iTest].uDstOut \
1878	\|\| fEfl != paTests[iTest].fEflOut) \
1879	RTTestFailed(g_hTest, "#%03u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " shift=%-2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s%s\n", \
1880	iTest, iVar == 0 ? "" : "/n", paTests[iTest].fEflIn, \
1881	paTests[iTest].uDstIn, paTests[iTest].uSrcIn, (unsigned)paTests[iTest].uMisc, \
1882	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1883	EFlagsDiff(fEfl, paTests[iTest].fEflOut), uDst == paTests[iTest].uDstOut ? "" : " dst!"); \
1884	else \
1885	{ \
1886	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1887	*g_pfEfl = paTests[iTest].fEflIn; \
1888	pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, paTests[iTest].uMisc, g_pfEfl); \
1889	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1890	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1891	} \
1892	} \
1893	pfn = a_aSubTests[iFn].pfnNative; \
1894	} \
1895	} \
1896	}
1897	TEST_SHIFT_DBL(16, uint16_t, "%#06RX16", BINU16_TEST_T, SHIFT_DBL_U16_T, g_aShiftDblU16)
1898	TEST_SHIFT_DBL(32, uint32_t, "%#010RX32", BINU32_TEST_T, SHIFT_DBL_U32_T, g_aShiftDblU32)
1899	TEST_SHIFT_DBL(64, uint64_t, "%#018RX64", BINU64_TEST_T, SHIFT_DBL_U64_T, g_aShiftDblU64)
1900
1901	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1902	static void ShiftDblGenerate(PRTSTREAM pOut, uint32_t cTests)
1903	{
1904	ShiftDblU16Generate(pOut, cTests);
1905	ShiftDblU32Generate(pOut, cTests);
1906	ShiftDblU64Generate(pOut, cTests);
1907	}
1908	#endif
1909
1910	static void ShiftDblTest(void)
1911	{
1912	ShiftDblU16Test();
1913	ShiftDblU32Test();
1914	ShiftDblU64Test();
1915	}
1916
1917
1918	/*
1919	* Unary operators.
1920	*
1921	* Note! We use BINUxx_TEST_T ignoreing uSrcIn and uMisc.
1922	*/
1923	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1924	# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1925	void UnaryU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1926	{ \
1927	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1928	{ \
1929	GenerateArrayStart(pOut, g_aUnaryU ## a_cBits[iFn].pszName, #a_TestType); \
1930	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1931	{ \
1932	a_TestType Test; \
1933	Test.fEflIn = RandEFlags(); \
1934	Test.fEflOut = Test.fEflIn; \
1935	Test.uDstIn = RandU ## a_cBits(); \
1936	Test.uDstOut = Test.uDstIn; \
1937	Test.uSrcIn = 0; \
1938	Test.uMisc = 0; \
1939	g_aUnaryU ## a_cBits[iFn].pfn(&Test.uDstOut, &Test.fEflOut); \
1940	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, 0 }, /* #%u */\n", \
1941	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, iTest); \
1942	} \
1943	GenerateArrayEnd(pOut, g_aUnaryU ## a_cBits[iFn].pszName); \
1944	} \
1945	}
1946	#else
1947	# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType)
1948	#endif
1949
1950	#define TEST_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1951	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLUNARYU ## a_cBits); \
1952	static a_SubTestType const g_aUnaryU ## a_cBits [] = \
1953	{ \
1954	ENTRY(inc_u ## a_cBits), \
1955	ENTRY(inc_u ## a_cBits ## _locked), \
1956	ENTRY(dec_u ## a_cBits), \
1957	ENTRY(dec_u ## a_cBits ## _locked), \
1958	ENTRY(not_u ## a_cBits), \
1959	ENTRY(not_u ## a_cBits ## _locked), \
1960	ENTRY(neg_u ## a_cBits), \
1961	ENTRY(neg_u ## a_cBits ## _locked), \
1962	}; \
1963	\
1964	GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1965	\
1966	static void UnaryU ## a_cBits ## Test(void) \
1967	{ \
1968	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1969	{ \
1970	if (!SubTestAndCheckIfEnabled(g_aUnaryU ## a_cBits[iFn].pszName)) continue; \
1971	a_TestType const * const paTests = g_aUnaryU ## a_cBits[iFn].paTests; \
1972	uint32_t const cTests = *g_aUnaryU ## a_cBits[iFn].pcTests; \
1973	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1974	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1975	{ \
1976	uint32_t fEfl = paTests[iTest].fEflIn; \
1977	a_Type uDst = paTests[iTest].uDstIn; \
1978	g_aUnaryU ## a_cBits[iFn].pfn(&uDst, &fEfl); \
1979	if ( uDst != paTests[iTest].uDstOut \
1980	\|\| fEfl != paTests[iTest].fEflOut) \
1981	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
1982	iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, \
1983	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1984	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1985	else \
1986	{ \
1987	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1988	*g_pfEfl = paTests[iTest].fEflIn; \
1989	g_aUnaryU ## a_cBits[iFn].pfn(g_pu ## a_cBits, g_pfEfl); \
1990	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1991	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1992	} \
1993	} \
1994	} \
1995	}
1996	TEST_UNARY(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_UNARY_U8_T)
1997	TEST_UNARY(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_UNARY_U16_T)
1998	TEST_UNARY(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_UNARY_U32_T)
1999	TEST_UNARY(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_UNARY_U64_T)
2000
2001	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2002	static void UnaryGenerate(PRTSTREAM pOut, uint32_t cTests)
2003	{
2004	UnaryU8Generate(pOut, cTests);
2005	UnaryU16Generate(pOut, cTests);
2006	UnaryU32Generate(pOut, cTests);
2007	UnaryU64Generate(pOut, cTests);
2008	}
2009	#endif
2010
2011	static void UnaryTest(void)
2012	{
2013	UnaryU8Test();
2014	UnaryU16Test();
2015	UnaryU32Test();
2016	UnaryU64Test();
2017	}
2018
2019
2020	/*
2021	* Shifts.
2022	*
2023	* Note! We use BINUxx_TEST_T with the shift count in uMisc and uSrcIn unused.
2024	*/
2025	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2026	# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2027	void ShiftU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2028	{ \
2029	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2030	{ \
2031	if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2032	&& a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2033	continue; \
2034	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2035	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2036	{ \
2037	a_TestType Test; \
2038	Test.fEflIn = RandEFlags(); \
2039	Test.fEflOut = Test.fEflIn; \
2040	Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
2041	Test.uDstOut = Test.uDstIn; \
2042	Test.uSrcIn = 0; \
2043	Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
2044	a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2045	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u */\n", \
2046	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
2047	\
2048	Test.fEflIn = (~Test.fEflIn & X86_EFL_LIVE_MASK) \| X86_EFL_RA1_MASK; \
2049	Test.fEflOut = Test.fEflIn; \
2050	Test.uDstOut = Test.uDstIn; \
2051	a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2052	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u b */\n", \
2053	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
2054	} \
2055	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2056	} \
2057	}
2058	#else
2059	# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2060	#endif
2061
2062	#define TEST_SHIFT(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2063	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTU ## a_cBits); \
2064	static a_SubTestType const a_aSubTests[] = \
2065	{ \
2066	ENTRY_AMD( rol_u ## a_cBits, X86_EFL_OF), \
2067	ENTRY_INTEL(rol_u ## a_cBits, X86_EFL_OF), \
2068	ENTRY_AMD( ror_u ## a_cBits, X86_EFL_OF), \
2069	ENTRY_INTEL(ror_u ## a_cBits, X86_EFL_OF), \
2070	ENTRY_AMD( rcl_u ## a_cBits, X86_EFL_OF), \
2071	ENTRY_INTEL(rcl_u ## a_cBits, X86_EFL_OF), \
2072	ENTRY_AMD( rcr_u ## a_cBits, X86_EFL_OF), \
2073	ENTRY_INTEL(rcr_u ## a_cBits, X86_EFL_OF), \
2074	ENTRY_AMD( shl_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2075	ENTRY_INTEL(shl_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2076	ENTRY_AMD( shr_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2077	ENTRY_INTEL(shr_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2078	ENTRY_AMD( sar_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2079	ENTRY_INTEL(sar_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
2080	}; \
2081	\
2082	GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2083	\
2084	static void ShiftU ## a_cBits ## Test(void) \
2085	{ \
2086	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2087	{ \
2088	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2089	PFNIEMAIMPLSHIFTU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2090	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2091	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2092	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2093	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2094	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2095	{ \
2096	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2097	{ \
2098	uint32_t fEfl = paTests[iTest].fEflIn; \
2099	a_Type uDst = paTests[iTest].uDstIn; \
2100	pfn(&uDst, paTests[iTest].uMisc, &fEfl); \
2101	if ( uDst != paTests[iTest].uDstOut \
2102	\|\| fEfl != paTests[iTest].fEflOut ) \
2103	RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " shift=%2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2104	iTest, iVar == 0 ? "" : "/n", \
2105	paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uMisc, \
2106	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2107	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2108	else \
2109	{ \
2110	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
2111	*g_pfEfl = paTests[iTest].fEflIn; \
2112	pfn(g_pu ## a_cBits, paTests[iTest].uMisc, g_pfEfl); \
2113	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2114	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2115	} \
2116	} \
2117	pfn = a_aSubTests[iFn].pfnNative; \
2118	} \
2119	} \
2120	}
2121	TEST_SHIFT(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_BINARY_U8_T, g_aShiftU8)
2122	TEST_SHIFT(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_BINARY_U16_T, g_aShiftU16)
2123	TEST_SHIFT(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_BINARY_U32_T, g_aShiftU32)
2124	TEST_SHIFT(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_BINARY_U64_T, g_aShiftU64)
2125
2126	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2127	static void ShiftGenerate(PRTSTREAM pOut, uint32_t cTests)
2128	{
2129	ShiftU8Generate(pOut, cTests);
2130	ShiftU16Generate(pOut, cTests);
2131	ShiftU32Generate(pOut, cTests);
2132	ShiftU64Generate(pOut, cTests);
2133	}
2134	#endif
2135
2136	static void ShiftTest(void)
2137	{
2138	ShiftU8Test();
2139	ShiftU16Test();
2140	ShiftU32Test();
2141	ShiftU64Test();
2142	}
2143
2144
2145	/*
2146	* Multiplication and division.
2147	*
2148	* Note! The 8-bit functions has a different format, so we need to duplicate things.
2149	* Note! Currently ignoring undefined bits.
2150	*/
2151
2152	/* U8 */
2153	TYPEDEF_SUBTEST_TYPE(INT_MULDIV_U8_T, MULDIVU8_TEST_T, PFNIEMAIMPLMULDIVU8);
2154	static INT_MULDIV_U8_T const g_aMulDivU8[] =
2155	{
2156	ENTRY_AMD_EX(mul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF,
2157	X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF),
2158	ENTRY_INTEL_EX(mul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0),
2159	ENTRY_AMD_EX(imul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF,
2160	X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF),
2161	ENTRY_INTEL_EX(imul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0),
2162	ENTRY_AMD_EX(div_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
2163	ENTRY_INTEL_EX(div_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
2164	ENTRY_AMD_EX(idiv_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
2165	ENTRY_INTEL_EX(idiv_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
2166	};
2167
2168	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2169	static void MulDivU8Generate(PRTSTREAM pOut, uint32_t cTests)
2170	{
2171	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2172	{
2173	if ( g_aMulDivU8[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
2174	&& g_aMulDivU8[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
2175	continue;
2176	GenerateArrayStart(pOut, g_aMulDivU8[iFn].pszName, "MULDIVU8_TEST_T"); \
2177	for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2178	{
2179	MULDIVU8_TEST_T Test;
2180	Test.fEflIn = RandEFlags();
2181	Test.fEflOut = Test.fEflIn;
2182	Test.uDstIn = RandU16Dst(iTest);
2183	Test.uDstOut = Test.uDstIn;
2184	Test.uSrcIn = RandU8Src(iTest);
2185	Test.rc = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut);
2186	RTStrmPrintf(pOut, " { %#08x, %#08x, %#06RX16, %#06RX16, %#04RX8, %d }, /* #%u */\n",
2187	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.rc, iTest);
2188	}
2189	GenerateArrayEnd(pOut, g_aMulDivU8[iFn].pszName);
2190	}
2191	}
2192	#endif
2193
2194	static void MulDivU8Test(void)
2195	{
2196	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2197	{
2198	if (!SubTestAndCheckIfEnabled(g_aMulDivU8[iFn].pszName)) continue; \
2199	MULDIVU8_TEST_T const * const paTests = g_aMulDivU8[iFn].paTests;
2200	uint32_t const cTests = *g_aMulDivU8[iFn].pcTests;
2201	uint32_t const fEflIgn = g_aMulDivU8[iFn].uExtra;
2202	PFNIEMAIMPLMULDIVU8 pfn = g_aMulDivU8[iFn].pfn;
2203	uint32_t const cVars = COUNT_VARIATIONS(g_aMulDivU8[iFn]); \
2204	if (!cTests) RTTestSkipped(g_hTest, "no tests");
2205	for (uint32_t iVar = 0; iVar < cVars; iVar++)
2206	{
2207	for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2208	{
2209	uint32_t fEfl = paTests[iTest].fEflIn;
2210	uint16_t uDst = paTests[iTest].uDstIn;
2211	int rc = g_aMulDivU8[iFn].pfn(&uDst, paTests[iTest].uSrcIn, &fEfl);
2212	if ( uDst != paTests[iTest].uDstOut
2213	\|\| (fEfl \| fEflIgn) != (paTests[iTest].fEflOut \| fEflIgn)
2214	\|\| rc != paTests[iTest].rc)
2215	RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst=%#06RX16 src=%#04RX8\n"
2216	" %s-> efl=%#08x dst=%#06RX16 rc=%d\n"
2217	"%sexpected %#08x %#06RX16 %d%s\n",
2218	iTest, iVar ? "/n" : "", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn,
2219	iVar ? " " : "", fEfl, uDst, rc,
2220	iVar ? " " : "", paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].rc,
2221	EFlagsDiff(fEfl \| fEflIgn, paTests[iTest].fEflOut \| fEflIgn));
2222	else
2223	{
2224	*g_pu16 = paTests[iTest].uDstIn;
2225	*g_pfEfl = paTests[iTest].fEflIn;
2226	rc = g_aMulDivU8[iFn].pfn(g_pu16, paTests[iTest].uSrcIn, g_pfEfl);
2227	RTTEST_CHECK(g_hTest, *g_pu16 == paTests[iTest].uDstOut);
2228	RTTEST_CHECK(g_hTest, (*g_pfEfl \| fEflIgn) == (paTests[iTest].fEflOut \| fEflIgn));
2229	RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc);
2230	}
2231	}
2232	pfn = g_aMulDivU8[iFn].pfnNative;
2233	}
2234	}
2235	}
2236
2237	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2238	# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2239	void MulDivU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2240	{ \
2241	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2242	{ \
2243	if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2244	&& a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2245	continue; \
2246	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2247	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2248	{ \
2249	a_TestType Test; \
2250	Test.fEflIn = RandEFlags(); \
2251	Test.fEflOut = Test.fEflIn; \
2252	Test.uDst1In = RandU ## a_cBits ## Dst(iTest); \
2253	Test.uDst1Out = Test.uDst1In; \
2254	Test.uDst2In = RandU ## a_cBits ## Dst(iTest); \
2255	Test.uDst2Out = Test.uDst2In; \
2256	Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
2257	Test.rc = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, &Test.fEflOut); \
2258	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", %d }, /* #%u */\n", \
2259	Test.fEflIn, Test.fEflOut, Test.uDst1In, Test.uDst1Out, Test.uDst2In, Test.uDst2Out, Test.uSrcIn, \
2260	Test.rc, iTest); \
2261	} \
2262	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2263	} \
2264	}
2265	#else
2266	# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2267	#endif
2268
2269	#define TEST_MULDIV(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2270	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLMULDIVU ## a_cBits); \
2271	static a_SubTestType const a_aSubTests [] = \
2272	{ \
2273	ENTRY_AMD_EX(mul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
2274	ENTRY_INTEL_EX(mul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
2275	ENTRY_AMD_EX(imul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
2276	ENTRY_INTEL_EX(imul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
2277	ENTRY_AMD_EX(div_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
2278	ENTRY_INTEL_EX(div_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
2279	ENTRY_AMD_EX(idiv_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
2280	ENTRY_INTEL_EX(idiv_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
2281	}; \
2282	\
2283	GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2284	\
2285	static void MulDivU ## a_cBits ## Test(void) \
2286	{ \
2287	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2288	{ \
2289	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2290	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2291	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2292	uint32_t const fEflIgn = a_aSubTests[iFn].uExtra; \
2293	PFNIEMAIMPLMULDIVU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2294	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2295	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2296	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2297	{ \
2298	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2299	{ \
2300	uint32_t fEfl = paTests[iTest].fEflIn; \
2301	a_Type uDst1 = paTests[iTest].uDst1In; \
2302	a_Type uDst2 = paTests[iTest].uDst2In; \
2303	int rc = pfn(&uDst1, &uDst2, paTests[iTest].uSrcIn, &fEfl); \
2304	if ( uDst1 != paTests[iTest].uDst1Out \
2305	\|\| uDst2 != paTests[iTest].uDst2Out \
2306	\|\| (fEfl \| fEflIgn) != (paTests[iTest].fEflOut \| fEflIgn)\
2307	\|\| rc != paTests[iTest].rc) \
2308	RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " src=" a_Fmt "\n" \
2309	" -> efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " rc=%d\n" \
2310	"expected %#08x " a_Fmt " " a_Fmt " %d%s -%s%s%s\n", \
2311	iTest, iVar == 0 ? "" : "/n", \
2312	paTests[iTest].fEflIn, paTests[iTest].uDst1In, paTests[iTest].uDst2In, paTests[iTest].uSrcIn, \
2313	fEfl, uDst1, uDst2, rc, \
2314	paTests[iTest].fEflOut, paTests[iTest].uDst1Out, paTests[iTest].uDst2Out, paTests[iTest].rc, \
2315	EFlagsDiff(fEfl \| fEflIgn, paTests[iTest].fEflOut \| fEflIgn), \
2316	uDst1 != paTests[iTest].uDst1Out ? " dst1" : "", uDst2 != paTests[iTest].uDst2Out ? " dst2" : "", \
2317	(fEfl \| fEflIgn) != (paTests[iTest].fEflOut \| fEflIgn) ? " eflags" : ""); \
2318	else \
2319	{ \
2320	*g_pu ## a_cBits = paTests[iTest].uDst1In; \
2321	*g_pu ## a_cBits ## Two = paTests[iTest].uDst2In; \
2322	*g_pfEfl = paTests[iTest].fEflIn; \
2323	rc = pfn(g_pu ## a_cBits, g_pu ## a_cBits ## Two, paTests[iTest].uSrcIn, g_pfEfl); \
2324	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDst1Out); \
2325	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits ## Two == paTests[iTest].uDst2Out); \
2326	RTTEST_CHECK(g_hTest, (*g_pfEfl \| fEflIgn) == (paTests[iTest].fEflOut \| fEflIgn)); \
2327	RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc); \
2328	} \
2329	} \
2330	pfn = a_aSubTests[iFn].pfnNative; \
2331	} \
2332	} \
2333	}
2334	TEST_MULDIV(16, uint16_t, "%#06RX16", MULDIVU16_TEST_T, INT_MULDIV_U16_T, g_aMulDivU16)
2335	TEST_MULDIV(32, uint32_t, "%#010RX32", MULDIVU32_TEST_T, INT_MULDIV_U32_T, g_aMulDivU32)
2336	TEST_MULDIV(64, uint64_t, "%#018RX64", MULDIVU64_TEST_T, INT_MULDIV_U64_T, g_aMulDivU64)
2337
2338	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2339	static void MulDivGenerate(PRTSTREAM pOut, uint32_t cTests)
2340	{
2341	MulDivU8Generate(pOut, cTests);
2342	MulDivU16Generate(pOut, cTests);
2343	MulDivU32Generate(pOut, cTests);
2344	MulDivU64Generate(pOut, cTests);
2345	}
2346	#endif
2347
2348	static void MulDivTest(void)
2349	{
2350	MulDivU8Test();
2351	MulDivU16Test();
2352	MulDivU32Test();
2353	MulDivU64Test();
2354	}
2355
2356
2357	/*
2358	* BSWAP
2359	*/
2360	static void BswapTest(void)
2361	{
2362	if (SubTestAndCheckIfEnabled("bswap_u16"))
2363	{
2364	*g_pu32 = UINT32_C(0x12345678);
2365	iemAImpl_bswap_u16(g_pu32);
2366	#if 0
2367	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0x12347856), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
2368	#else
2369	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0x12340000), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
2370	#endif
2371	*g_pu32 = UINT32_C(0xffff1122);
2372	iemAImpl_bswap_u16(g_pu32);
2373	#if 0
2374	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0xffff2211), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
2375	#else
2376	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0xffff0000), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
2377	#endif
2378	}
2379
2380	if (SubTestAndCheckIfEnabled("bswap_u32"))
2381	{
2382	*g_pu32 = UINT32_C(0x12345678);
2383	iemAImpl_bswap_u32(g_pu32);
2384	RTTEST_CHECK(g_hTest, *g_pu32 == UINT32_C(0x78563412));
2385	}
2386
2387	if (SubTestAndCheckIfEnabled("bswap_u64"))
2388	{
2389	*g_pu64 = UINT64_C(0x0123456789abcdef);
2390	iemAImpl_bswap_u64(g_pu64);
2391	RTTEST_CHECK(g_hTest, *g_pu64 == UINT64_C(0xefcdab8967452301));
2392	}
2393	}
2394
2395
2396
2397	/*********************************************************************************************************************************
2398	* Floating point (x87 style) *
2399	*********************************************************************************************************************************/
2400
2401	/*
2402	* FPU constant loading.
2403	*/
2404	TYPEDEF_SUBTEST_TYPE(FPU_LD_CONST_T, FPU_LD_CONST_TEST_T, PFNIEMAIMPLFPUR80LDCONST);
2405
2406	static const FPU_LD_CONST_T g_aFpuLdConst[] =
2407	{
2408	ENTRY(fld1),
2409	ENTRY(fldl2t),
2410	ENTRY(fldl2e),
2411	ENTRY(fldpi),
2412	ENTRY(fldlg2),
2413	ENTRY(fldln2),
2414	ENTRY(fldz),
2415	};
2416
2417	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2418	static void FpuLdConstGenerate(PRTSTREAM pOut, uint32_t cTests)
2419	{
2420	X86FXSTATE State;
2421	RT_ZERO(State);
2422	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2423	{
2424	GenerateArrayStart(pOut, g_aFpuLdConst[iFn].pszName, "FPU_LD_CONST_TEST_T");
2425	for (uint32_t iTest = 0; iTest < cTests; iTest += 4)
2426	{
2427	State.FCW = RandFcw();
2428	State.FSW = RandFsw();
2429
2430	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2431	{
2432	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2433	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT);
2434	g_aFpuLdConst[iFn].pfn(&State, &Res);
2435	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s }, /* #%u */\n",
2436	State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), iTest + iRounding);
2437	}
2438	}
2439	GenerateArrayEnd(pOut, g_aFpuLdConst[iFn].pszName);
2440	}
2441	}
2442	#endif
2443
2444	static void FpuLoadConstTest(void)
2445	{
2446	/*
2447	* Inputs:
2448	* - FSW: C0, C1, C2, C3
2449	* - FCW: Exception masks, Precision control, Rounding control.
2450	*
2451	* C1 set to 1 on stack overflow, zero otherwise. C0, C2, and C3 are "undefined".
2452	*/
2453	X86FXSTATE State;
2454	RT_ZERO(State);
2455	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2456	{
2457	if (!SubTestAndCheckIfEnabled(g_aFpuLdConst[iFn].pszName))
2458	continue;
2459
2460	uint32_t const cTests = *g_aFpuLdConst[iFn].pcTests;
2461	FPU_LD_CONST_TEST_T const *paTests = g_aFpuLdConst[iFn].paTests;
2462	PFNIEMAIMPLFPUR80LDCONST pfn = g_aFpuLdConst[iFn].pfn;
2463	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdConst[iFn]); \
2464	if (!cTests) RTTestSkipped(g_hTest, "no tests");
2465	for (uint32_t iVar = 0; iVar < cVars; iVar++)
2466	{
2467	for (uint32_t iTest = 0; iTest < cTests; iTest++)
2468	{
2469	State.FCW = paTests[iTest].fFcw;
2470	State.FSW = paTests[iTest].fFswIn;
2471	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2472	pfn(&State, &Res);
2473	if ( Res.FSW != paTests[iTest].fFswOut
2474	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2475	RTTestFailed(g_hTest, "#%u%s: fcw=%#06x fsw=%#06x -> fsw=%#06x %s, expected %#06x %s%s%s (%s)\n",
2476	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2477	Res.FSW, FormatR80(&Res.r80Result),
2478	paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2479	FswDiff(Res.FSW, paTests[iTest].fFswOut),
2480	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2481	FormatFcw(paTests[iTest].fFcw) );
2482	}
2483	pfn = g_aFpuLdConst[iFn].pfnNative;
2484	}
2485	}
2486	}
2487
2488
2489	/*
2490	* Load floating point values from memory.
2491	*/
2492	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2493	# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2494	static void FpuLdR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2495	{ \
2496	X86FXSTATE State; \
2497	RT_ZERO(State); \
2498	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2499	{ \
2500	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2501	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2502	{ \
2503	State.FCW = RandFcw(); \
2504	State.FSW = RandFsw(); \
2505	a_rdTypeIn InVal = RandR ## a_cBits ## Src(iTest); \
2506	\
2507	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2508	{ \
2509	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2510	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT); \
2511	a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2512	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n", \
2513	State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), \
2514	GenFormatR ## a_cBits(&InVal), iTest, iRounding); \
2515	} \
2516	} \
2517	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2518	} \
2519	}
2520	#else
2521	# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType)
2522	#endif
2523
2524	#define TEST_FPU_LOAD(a_cBits, a_rdTypeIn, a_SubTestType, a_aSubTests, a_TestType) \
2525	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROM ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, PC ## a_rdTypeIn)); \
2526	typedef FNIEMAIMPLFPULDR80FROM ## a_cBits *PFNIEMAIMPLFPULDR80FROM ## a_cBits; \
2527	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROM ## a_cBits); \
2528	\
2529	static const a_SubTestType a_aSubTests[] = \
2530	{ \
2531	ENTRY(RT_CONCAT(fld_r80_from_r,a_cBits)) \
2532	}; \
2533	GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2534	\
2535	static void FpuLdR ## a_cBits ## Test(void) \
2536	{ \
2537	X86FXSTATE State; \
2538	RT_ZERO(State); \
2539	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2540	{ \
2541	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2542	\
2543	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2544	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2545	PFNIEMAIMPLFPULDR80FROM ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2546	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2547	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2548	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2549	{ \
2550	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2551	{ \
2552	a_rdTypeIn const InVal = paTests[iTest].InVal; \
2553	State.FCW = paTests[iTest].fFcw; \
2554	State.FSW = paTests[iTest].fFswIn; \
2555	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2556	pfn(&State, &Res, &InVal); \
2557	if ( Res.FSW != paTests[iTest].fFswOut \
2558	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2559	RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2560	"%s -> fsw=%#06x %s\n" \
2561	"%s expected %#06x %s%s%s (%s)\n", \
2562	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2563	FormatR ## a_cBits(&paTests[iTest].InVal), \
2564	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2565	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2566	FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2567	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2568	FormatFcw(paTests[iTest].fFcw) ); \
2569	} \
2570	pfn = a_aSubTests[iFn].pfnNative; \
2571	} \
2572	} \
2573	}
2574
2575	TEST_FPU_LOAD(80, RTFLOAT80U, FPU_LD_R80_T, g_aFpuLdR80, FPU_R80_IN_TEST_T)
2576	TEST_FPU_LOAD(64, RTFLOAT64U, FPU_LD_R64_T, g_aFpuLdR64, FPU_R64_IN_TEST_T)
2577	TEST_FPU_LOAD(32, RTFLOAT32U, FPU_LD_R32_T, g_aFpuLdR32, FPU_R32_IN_TEST_T)
2578
2579	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2580	static void FpuLdMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2581	{
2582	FpuLdR80Generate(pOut, cTests);
2583	FpuLdR64Generate(pOut, cTests);
2584	FpuLdR32Generate(pOut, cTests);
2585	}
2586	#endif
2587
2588	static void FpuLdMemTest(void)
2589	{
2590	FpuLdR80Test();
2591	FpuLdR64Test();
2592	FpuLdR32Test();
2593	}
2594
2595
2596	/*
2597	* Load integer values from memory.
2598	*/
2599	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2600	# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2601	static void FpuLdI ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2602	{ \
2603	X86FXSTATE State; \
2604	RT_ZERO(State); \
2605	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2606	{ \
2607	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2608	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2609	{ \
2610	State.FCW = RandFcw(); \
2611	State.FSW = RandFsw(); \
2612	a_iTypeIn InVal = (a_iTypeIn)RandU ## a_cBits ## Src(iTest); \
2613	\
2614	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2615	{ \
2616	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2617	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT); \
2618	a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2619	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, " a_szFmtIn " }, /* #%u/%u */\n", \
2620	State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), InVal, iTest, iRounding); \
2621	} \
2622	} \
2623	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2624	} \
2625	}
2626	#else
2627	# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType)
2628	#endif
2629
2630	#define TEST_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_SubTestType, a_aSubTests, a_TestType) \
2631	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMI ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, a_iTypeIn const *)); \
2632	typedef FNIEMAIMPLFPULDR80FROMI ## a_cBits *PFNIEMAIMPLFPULDR80FROMI ## a_cBits; \
2633	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROMI ## a_cBits); \
2634	\
2635	static const a_SubTestType a_aSubTests[] = \
2636	{ \
2637	ENTRY(RT_CONCAT(fild_r80_from_i,a_cBits)) \
2638	}; \
2639	GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2640	\
2641	static void FpuLdI ## a_cBits ## Test(void) \
2642	{ \
2643	X86FXSTATE State; \
2644	RT_ZERO(State); \
2645	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2646	{ \
2647	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2648	\
2649	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2650	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2651	PFNIEMAIMPLFPULDR80FROMI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2652	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2653	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2654	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2655	{ \
2656	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2657	{ \
2658	a_iTypeIn const iInVal = paTests[iTest].iInVal; \
2659	State.FCW = paTests[iTest].fFcw; \
2660	State.FSW = paTests[iTest].fFswIn; \
2661	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2662	pfn(&State, &Res, &iInVal); \
2663	if ( Res.FSW != paTests[iTest].fFswOut \
2664	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2665	RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=" a_szFmtIn "\n" \
2666	"%s -> fsw=%#06x %s\n" \
2667	"%s expected %#06x %s%s%s (%s)\n", \
2668	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, paTests[iTest].iInVal, \
2669	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2670	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2671	FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2672	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2673	FormatFcw(paTests[iTest].fFcw) ); \
2674	} \
2675	pfn = a_aSubTests[iFn].pfnNative; \
2676	} \
2677	} \
2678	}
2679
2680	TEST_FPU_LOAD_INT(64, int64_t, "%RI64", FPU_LD_I64_T, g_aFpuLdU64, FPU_I64_IN_TEST_T)
2681	TEST_FPU_LOAD_INT(32, int32_t, "%RI32", FPU_LD_I32_T, g_aFpuLdU32, FPU_I32_IN_TEST_T)
2682	TEST_FPU_LOAD_INT(16, int16_t, "%RI16", FPU_LD_I16_T, g_aFpuLdU16, FPU_I16_IN_TEST_T)
2683
2684	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2685	static void FpuLdIntGenerate(PRTSTREAM pOut, uint32_t cTests)
2686	{
2687	FpuLdI64Generate(pOut, cTests);
2688	FpuLdI32Generate(pOut, cTests);
2689	FpuLdI16Generate(pOut, cTests);
2690	}
2691	#endif
2692
2693	static void FpuLdIntTest(void)
2694	{
2695	FpuLdI64Test();
2696	FpuLdI32Test();
2697	FpuLdI16Test();
2698	}
2699
2700
2701	/*
2702	* Load binary coded decimal values from memory.
2703	*/
2704	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMD80,(PCX86FXSTATE, PIEMFPURESULT, PCRTPBCD80U));
2705	typedef FNIEMAIMPLFPULDR80FROMD80 *PFNIEMAIMPLFPULDR80FROMD80;
2706	TYPEDEF_SUBTEST_TYPE(FPU_LD_D80_T, FPU_D80_IN_TEST_T, PFNIEMAIMPLFPULDR80FROMD80);
2707
2708	static const FPU_LD_D80_T g_aFpuLdD80[] =
2709	{
2710	ENTRY(fld_r80_from_d80)
2711	};
2712
2713	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2714	static void FpuLdD80Generate(PRTSTREAM pOut, uint32_t cTests)
2715	{
2716	X86FXSTATE State;
2717	RT_ZERO(State);
2718	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2719	{
2720	GenerateArrayStart(pOut, g_aFpuLdD80[iFn].pszName, "FPU_D80_IN_TEST_T");
2721	for (uint32_t iTest = 0; iTest < cTests; iTest++)
2722	{
2723	State.FCW = RandFcw();
2724	State.FSW = RandFsw();
2725	RTPBCD80U InVal = RandD80Src(iTest);
2726
2727	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2728	{
2729	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2730	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT);
2731	g_aFpuLdD80[iFn].pfn(&State, &Res, &InVal);
2732	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n",
2733	State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), GenFormatD80(&InVal),
2734	iTest, iRounding);
2735	}
2736	}
2737	GenerateArrayEnd(pOut, g_aFpuLdD80[iFn].pszName);
2738	}
2739	}
2740	#endif
2741
2742	static void FpuLdD80Test(void)
2743	{
2744	X86FXSTATE State;
2745	RT_ZERO(State);
2746	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2747	{
2748	if (!SubTestAndCheckIfEnabled(g_aFpuLdD80[iFn].pszName))
2749	continue;
2750
2751	uint32_t const cTests = *g_aFpuLdD80[iFn].pcTests;
2752	FPU_D80_IN_TEST_T const * const paTests = g_aFpuLdD80[iFn].paTests;
2753	PFNIEMAIMPLFPULDR80FROMD80 pfn = g_aFpuLdD80[iFn].pfn;
2754	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdD80[iFn]);
2755	if (!cTests) RTTestSkipped(g_hTest, "no tests");
2756	for (uint32_t iVar = 0; iVar < cVars; iVar++)
2757	{
2758	for (uint32_t iTest = 0; iTest < cTests; iTest++)
2759	{
2760	RTPBCD80U const InVal = paTests[iTest].InVal;
2761	State.FCW = paTests[iTest].fFcw;
2762	State.FSW = paTests[iTest].fFswIn;
2763	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2764	pfn(&State, &Res, &InVal);
2765	if ( Res.FSW != paTests[iTest].fFswOut
2766	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2767	RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n"
2768	"%s -> fsw=%#06x %s\n"
2769	"%s expected %#06x %s%s%s (%s)\n",
2770	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2771	FormatD80(&paTests[iTest].InVal),
2772	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
2773	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2774	FswDiff(Res.FSW, paTests[iTest].fFswOut),
2775	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2776	FormatFcw(paTests[iTest].fFcw) );
2777	}
2778	pfn = g_aFpuLdD80[iFn].pfnNative;
2779	}
2780	}
2781	}
2782
2783
2784	/*
2785	* Store values floating point values to memory.
2786	*/
2787	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2788	static const RTFLOAT80U g_aFpuStR32Specials[] =
2789	{
2790	RTFLOAT80U_INIT_C(0, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2791	RTFLOAT80U_INIT_C(1, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2792	RTFLOAT80U_INIT_C(0, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2793	RTFLOAT80U_INIT_C(1, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2794	};
2795	static const RTFLOAT80U g_aFpuStR64Specials[] =
2796	{
2797	RTFLOAT80U_INIT_C(0, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2798	RTFLOAT80U_INIT_C(1, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2799	RTFLOAT80U_INIT_C(0, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2800	RTFLOAT80U_INIT_C(1, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2801	RTFLOAT80U_INIT_C(0, 0xd0b9e6fdda887400, 687 + RTFLOAT80U_EXP_BIAS), /* random example for this */
2802	};
2803	static const RTFLOAT80U g_aFpuStR80Specials[] =
2804	{
2805	RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* placeholder */
2806	};
2807	# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2808	static void FpuStR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2809	{ \
2810	uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStR ## a_cBits ## Specials); \
2811	X86FXSTATE State; \
2812	RT_ZERO(State); \
2813	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2814	{ \
2815	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2816	for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
2817	{ \
2818	uint16_t const fFcw = RandFcw(); \
2819	State.FSW = RandFsw(); \
2820	RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits) \
2821	: g_aFpuStR ## a_cBits ## Specials[iTest - cTests]; \
2822	\
2823	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2824	{ \
2825	/* PC doesn't influence these, so leave as is. */ \
2826	AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
2827	for (uint16_t iMask = 0; iMask < 16; iMask += 2 /1/) \
2828	{ \
2829	uint16_t uFswOut = 0; \
2830	a_rdType OutVal; \
2831	RT_ZERO(OutVal); \
2832	memset(&OutVal, 0xfe, sizeof(OutVal)); \
2833	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_OM \| X86_FCW_UM \| X86_FCW_PM)) \
2834	\| (iRounding << X86_FCW_RC_SHIFT); \
2835	/if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;/ \
2836	State.FCW \|= (iMask >> 1) << X86_FCW_OM_BIT; \
2837	a_aSubTests[iFn].pfn(&State, &uFswOut, &OutVal, &InVal); \
2838	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
2839	State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
2840	GenFormatR ## a_cBits(&OutVal), iTest, iRounding, iMask); \
2841	} \
2842	} \
2843	} \
2844	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2845	} \
2846	}
2847	#else
2848	# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType)
2849	#endif
2850
2851	#define TEST_FPU_STORE(a_cBits, a_rdType, a_SubTestType, a_aSubTests, a_TestType) \
2852	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOR ## a_cBits,(PCX86FXSTATE, uint16_t *, \
2853	PRTFLOAT ## a_cBits ## U, PCRTFLOAT80U)); \
2854	typedef FNIEMAIMPLFPUSTR80TOR ## a_cBits *PFNIEMAIMPLFPUSTR80TOR ## a_cBits; \
2855	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPUSTR80TOR ## a_cBits); \
2856	\
2857	static const a_SubTestType a_aSubTests[] = \
2858	{ \
2859	ENTRY(RT_CONCAT(fst_r80_to_r,a_cBits)) \
2860	}; \
2861	GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2862	\
2863	static void FpuStR ## a_cBits ## Test(void) \
2864	{ \
2865	X86FXSTATE State; \
2866	RT_ZERO(State); \
2867	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2868	{ \
2869	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2870	\
2871	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2872	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2873	PFNIEMAIMPLFPUSTR80TOR ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2874	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2875	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2876	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2877	{ \
2878	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2879	{ \
2880	RTFLOAT80U const InVal = paTests[iTest].InVal; \
2881	uint16_t uFswOut = 0; \
2882	a_rdType OutVal; \
2883	RT_ZERO(OutVal); \
2884	memset(&OutVal, 0xfe, sizeof(OutVal)); \
2885	State.FCW = paTests[iTest].fFcw; \
2886	State.FSW = paTests[iTest].fFswIn; \
2887	pfn(&State, &uFswOut, &OutVal, &InVal); \
2888	if ( uFswOut != paTests[iTest].fFswOut \
2889	\|\| !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal)) \
2890	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2891	"%s -> fsw=%#06x %s\n" \
2892	"%s expected %#06x %s%s%s (%s)\n", \
2893	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2894	FormatR80(&paTests[iTest].InVal), \
2895	iVar ? " " : "", uFswOut, FormatR ## a_cBits(&OutVal), \
2896	iVar ? " " : "", paTests[iTest].fFswOut, FormatR ## a_cBits(&paTests[iTest].OutVal), \
2897	FswDiff(uFswOut, paTests[iTest].fFswOut), \
2898	!RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "", \
2899	FormatFcw(paTests[iTest].fFcw) ); \
2900	} \
2901	pfn = a_aSubTests[iFn].pfnNative; \
2902	} \
2903	} \
2904	}
2905
2906	TEST_FPU_STORE(80, RTFLOAT80U, FPU_ST_R80_T, g_aFpuStR80, FPU_ST_R80_TEST_T)
2907	TEST_FPU_STORE(64, RTFLOAT64U, FPU_ST_R64_T, g_aFpuStR64, FPU_ST_R64_TEST_T)
2908	TEST_FPU_STORE(32, RTFLOAT32U, FPU_ST_R32_T, g_aFpuStR32, FPU_ST_R32_TEST_T)
2909
2910	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2911	static void FpuStMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2912	{
2913	FpuStR80Generate(pOut, cTests);
2914	FpuStR64Generate(pOut, cTests);
2915	FpuStR32Generate(pOut, cTests);
2916	}
2917	#endif
2918
2919	static void FpuStMemTest(void)
2920	{
2921	FpuStR80Test();
2922	FpuStR64Test();
2923	FpuStR32Test();
2924	}
2925
2926
2927	/*
2928	* Store integer values to memory or register.
2929	*/
2930	TYPEDEF_SUBTEST_TYPE(FPU_ST_I16_T, FPU_ST_I16_TEST_T, PFNIEMAIMPLFPUSTR80TOI16);
2931	TYPEDEF_SUBTEST_TYPE(FPU_ST_I32_T, FPU_ST_I32_TEST_T, PFNIEMAIMPLFPUSTR80TOI32);
2932	TYPEDEF_SUBTEST_TYPE(FPU_ST_I64_T, FPU_ST_I64_TEST_T, PFNIEMAIMPLFPUSTR80TOI64);
2933
2934	static const FPU_ST_I16_T g_aFpuStI16[] =
2935	{
2936	ENTRY(fist_r80_to_i16),
2937	ENTRY_AMD( fistt_r80_to_i16, 0),
2938	ENTRY_INTEL(fistt_r80_to_i16, 0),
2939	};
2940	static const FPU_ST_I32_T g_aFpuStI32[] =
2941	{
2942	ENTRY(fist_r80_to_i32),
2943	ENTRY(fistt_r80_to_i32),
2944	};
2945	static const FPU_ST_I64_T g_aFpuStI64[] =
2946	{
2947	ENTRY(fist_r80_to_i64),
2948	ENTRY(fistt_r80_to_i64),
2949	};
2950
2951	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2952	static const RTFLOAT80U g_aFpuStI16Specials[] = /* 16-bit variant borrows properties from the 32-bit one, thus all this stuff. */
2953	{
2954	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 13 + RTFLOAT80U_EXP_BIAS),
2955	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 13 + RTFLOAT80U_EXP_BIAS),
2956	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2957	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2958	RTFLOAT80U_INIT_C(0, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2959	RTFLOAT80U_INIT_C(1, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2960	RTFLOAT80U_INIT_C(0, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2961	RTFLOAT80U_INIT_C(1, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2962	RTFLOAT80U_INIT_C(0, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2963	RTFLOAT80U_INIT_C(1, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2964	RTFLOAT80U_INIT_C(0, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2965	RTFLOAT80U_INIT_C(1, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2966	RTFLOAT80U_INIT_C(0, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2967	RTFLOAT80U_INIT_C(1, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2968	RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 14 + RTFLOAT80U_EXP_BIAS),
2969	RTFLOAT80U_INIT_C(0, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2970	RTFLOAT80U_INIT_C(1, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2971	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2972	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2973	RTFLOAT80U_INIT_C(0, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2974	RTFLOAT80U_INIT_C(0, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2975	RTFLOAT80U_INIT_C(0, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2976	RTFLOAT80U_INIT_C(1, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2977	RTFLOAT80U_INIT_C(1, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* min */
2978	RTFLOAT80U_INIT_C(1, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2979	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS),
2980	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 15 + RTFLOAT80U_EXP_BIAS),
2981	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS),
2982	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 17 + RTFLOAT80U_EXP_BIAS),
2983	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS),
2984	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS),
2985	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 28 + RTFLOAT80U_EXP_BIAS),
2986	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2987	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2988	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
2989	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
2990	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2991	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2992	RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2993	RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2994	RTFLOAT80U_INIT_C(0, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
2995	RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
2996	RTFLOAT80U_INIT_C(0, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2997	RTFLOAT80U_INIT_C(1, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2998	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2999	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3000	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 32 + RTFLOAT80U_EXP_BIAS),
3001	};
3002	static const RTFLOAT80U g_aFpuStI32Specials[] =
3003	{
3004	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3005	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3006	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3007	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3008	RTFLOAT80U_INIT_C(0, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3009	RTFLOAT80U_INIT_C(1, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3010	RTFLOAT80U_INIT_C(0, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3011	RTFLOAT80U_INIT_C(1, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3012	RTFLOAT80U_INIT_C(0, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
3013	RTFLOAT80U_INIT_C(1, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
3014	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3015	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3016	RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3017	RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3018	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3019	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3020	};
3021	static const RTFLOAT80U g_aFpuStI64Specials[] =
3022	{
3023	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 61 + RTFLOAT80U_EXP_BIAS),
3024	RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 61 + RTFLOAT80U_EXP_BIAS),
3025	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3026	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3027	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3028	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3029	RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3030	RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* min */
3031	RTFLOAT80U_INIT_C(0, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3032	RTFLOAT80U_INIT_C(1, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3033	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3034	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3035	RTFLOAT80U_INIT_C(0, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3036	RTFLOAT80U_INIT_C(1, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3037	RTFLOAT80U_INIT_C(0, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3038	RTFLOAT80U_INIT_C(1, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3039	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 63 + RTFLOAT80U_EXP_BIAS),
3040	};
3041
3042	# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3043	static void FpuStI ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
3044	{ \
3045	X86FXSTATE State; \
3046	RT_ZERO(State); \
3047	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3048	{ \
3049	PFNIEMAIMPLFPUSTR80TOI ## a_cBits const pfn = a_aSubTests[iFn].pfnNative \
3050	? a_aSubTests[iFn].pfnNative : a_aSubTests[iFn].pfn; \
3051	PRTSTREAM pOutFn = pOut; \
3052	if (a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
3053	{ \
3054	if (a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
3055	continue; \
3056	pOutFn = pOutCpu; \
3057	} \
3058	\
3059	GenerateArrayStart(pOutFn, a_aSubTests[iFn].pszName, #a_TestType); \
3060	uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStI ## a_cBits ## Specials); \
3061	for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
3062	{ \
3063	uint16_t const fFcw = RandFcw(); \
3064	State.FSW = RandFsw(); \
3065	RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits, true) \
3066	: g_aFpuStI ## a_cBits ## Specials[iTest - cTests]; \
3067	\
3068	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3069	{ \
3070	/* PC doesn't influence these, so leave as is. */ \
3071	AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
3072	for (uint16_t iMask = 0; iMask < 16; iMask += 2 /1/) \
3073	{ \
3074	uint16_t uFswOut = 0; \
3075	a_iType iOutVal = ~(a_iType)2; \
3076	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_OM \| X86_FCW_UM \| X86_FCW_PM)) \
3077	\| (iRounding << X86_FCW_RC_SHIFT); \
3078	/if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;/ \
3079	State.FCW \|= (iMask >> 1) << X86_FCW_OM_BIT; \
3080	pfn(&State, &uFswOut, &iOutVal, &InVal); \
3081	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
3082	State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
3083	GenFormatI ## a_cBits(iOutVal), iTest, iRounding, iMask); \
3084	} \
3085	} \
3086	} \
3087	GenerateArrayEnd(pOutFn, a_aSubTests[iFn].pszName); \
3088	} \
3089	}
3090	#else
3091	# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType)
3092	#endif
3093
3094	#define TEST_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_SubTestType, a_aSubTests, a_TestType) \
3095	GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3096	\
3097	static void FpuStI ## a_cBits ## Test(void) \
3098	{ \
3099	X86FXSTATE State; \
3100	RT_ZERO(State); \
3101	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3102	{ \
3103	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3104	\
3105	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3106	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3107	PFNIEMAIMPLFPUSTR80TOI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3108	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3109	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3110	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3111	{ \
3112	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3113	{ \
3114	RTFLOAT80U const InVal = paTests[iTest].InVal; \
3115	uint16_t uFswOut = 0; \
3116	a_iType iOutVal = ~(a_iType)2; \
3117	State.FCW = paTests[iTest].fFcw; \
3118	State.FSW = paTests[iTest].fFswIn; \
3119	pfn(&State, &uFswOut, &iOutVal, &InVal); \
3120	if ( uFswOut != paTests[iTest].fFswOut \
3121	\|\| iOutVal != paTests[iTest].iOutVal) \
3122	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3123	"%s -> fsw=%#06x " a_szFmt "\n" \
3124	"%s expected %#06x " a_szFmt "%s%s (%s)\n", \
3125	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3126	FormatR80(&paTests[iTest].InVal), \
3127	iVar ? " " : "", uFswOut, iOutVal, \
3128	iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].iOutVal, \
3129	FswDiff(uFswOut, paTests[iTest].fFswOut), \
3130	iOutVal != paTests[iTest].iOutVal ? " - val" : "", FormatFcw(paTests[iTest].fFcw) ); \
3131	} \
3132	pfn = a_aSubTests[iFn].pfnNative; \
3133	} \
3134	} \
3135	}
3136
3137	//fistt_r80_to_i16 diffs for AMD, of course :-)
3138
3139	TEST_FPU_STORE_INT(64, int64_t, "%RI64", FPU_ST_I64_T, g_aFpuStI64, FPU_ST_I64_TEST_T)
3140	TEST_FPU_STORE_INT(32, int32_t, "%RI32", FPU_ST_I32_T, g_aFpuStI32, FPU_ST_I32_TEST_T)
3141	TEST_FPU_STORE_INT(16, int16_t, "%RI16", FPU_ST_I16_T, g_aFpuStI16, FPU_ST_I16_TEST_T)
3142
3143	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3144	static void FpuStIntGenerate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3145	{
3146	FpuStI64Generate(pOut, pOutCpu, cTests);
3147	FpuStI32Generate(pOut, pOutCpu, cTests);
3148	FpuStI16Generate(pOut, pOutCpu, cTests);
3149	}
3150	#endif
3151
3152	static void FpuStIntTest(void)
3153	{
3154	FpuStI64Test();
3155	FpuStI32Test();
3156	FpuStI16Test();
3157	}
3158
3159
3160	/*
3161	* Store as packed BCD value (memory).
3162	*/
3163	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOD80,(PCX86FXSTATE, uint16_t *, PRTPBCD80U, PCRTFLOAT80U));
3164	typedef FNIEMAIMPLFPUSTR80TOD80 *PFNIEMAIMPLFPUSTR80TOD80;
3165	TYPEDEF_SUBTEST_TYPE(FPU_ST_D80_T, FPU_ST_D80_TEST_T, PFNIEMAIMPLFPUSTR80TOD80);
3166
3167	static const FPU_ST_D80_T g_aFpuStD80[] =
3168	{
3169	ENTRY(fst_r80_to_d80),
3170	};
3171
3172	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3173	static void FpuStD80Generate(PRTSTREAM pOut, uint32_t cTests)
3174	{
3175	static RTFLOAT80U const s_aSpecials[] =
3176	{
3177	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 below max */
3178	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 above min */
3179	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact max */
3180	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact min */
3181	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* max & all rounded off bits set */
3182	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* min & all rounded off bits set */
3183	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* max & some rounded off bits set */
3184	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* min & some rounded off bits set */
3185	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* max & some other rounded off bits set */
3186	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* min & some other rounded off bits set */
3187	RTFLOAT80U_INIT_C(0, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 above max */
3188	RTFLOAT80U_INIT_C(1, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 below min */
3189	};
3190
3191	X86FXSTATE State;
3192	RT_ZERO(State);
3193	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3194	{
3195	GenerateArrayStart(pOut, g_aFpuStD80[iFn].pszName, "FPU_ST_D80_TEST_T");
3196	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3197	{
3198	uint16_t const fFcw = RandFcw();
3199	State.FSW = RandFsw();
3200	RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, 59, true) : s_aSpecials[iTest - cTests];
3201
3202	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3203	{
3204	/* PC doesn't influence these, so leave as is. */
3205	AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT);
3206	for (uint16_t iMask = 0; iMask < 16; iMask += 2 /1/)
3207	{
3208	uint16_t uFswOut = 0;
3209	RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3210	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_OM \| X86_FCW_UM \| X86_FCW_PM))
3211	\| (iRounding << X86_FCW_RC_SHIFT);
3212	/if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;/
3213	State.FCW \|= (iMask >> 1) << X86_FCW_OM_BIT;
3214	g_aFpuStD80[iFn].pfn(&State, &uFswOut, &OutVal, &InVal);
3215	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n",
3216	State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal),
3217	GenFormatD80(&OutVal), iTest, iRounding, iMask);
3218	}
3219	}
3220	}
3221	GenerateArrayEnd(pOut, g_aFpuStD80[iFn].pszName);
3222	}
3223	}
3224	#endif
3225
3226
3227	static void FpuStD80Test(void)
3228	{
3229	X86FXSTATE State;
3230	RT_ZERO(State);
3231	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3232	{
3233	if (!SubTestAndCheckIfEnabled(g_aFpuStD80[iFn].pszName))
3234	continue;
3235
3236	uint32_t const cTests = *g_aFpuStD80[iFn].pcTests;
3237	FPU_ST_D80_TEST_T const * const paTests = g_aFpuStD80[iFn].paTests;
3238	PFNIEMAIMPLFPUSTR80TOD80 pfn = g_aFpuStD80[iFn].pfn;
3239	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuStD80[iFn]);
3240	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3241	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3242	{
3243	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3244	{
3245	RTFLOAT80U const InVal = paTests[iTest].InVal;
3246	uint16_t uFswOut = 0;
3247	RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3248	State.FCW = paTests[iTest].fFcw;
3249	State.FSW = paTests[iTest].fFswIn;
3250	pfn(&State, &uFswOut, &OutVal, &InVal);
3251	if ( uFswOut != paTests[iTest].fFswOut
3252	\|\| !RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal))
3253	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
3254	"%s -> fsw=%#06x %s\n"
3255	"%s expected %#06x %s%s%s (%s)\n",
3256	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3257	FormatR80(&paTests[iTest].InVal),
3258	iVar ? " " : "", uFswOut, FormatD80(&OutVal),
3259	iVar ? " " : "", paTests[iTest].fFswOut, FormatD80(&paTests[iTest].OutVal),
3260	FswDiff(uFswOut, paTests[iTest].fFswOut),
3261	RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "",
3262	FormatFcw(paTests[iTest].fFcw) );
3263	}
3264	pfn = g_aFpuStD80[iFn].pfnNative;
3265	}
3266	}
3267	}
3268
3269
3270
3271	/*********************************************************************************************************************************
3272	* x87 FPU Binary Operations *
3273	*********************************************************************************************************************************/
3274
3275	/*
3276	* Binary FPU operations on two 80-bit floating point values.
3277	*/
3278	TYPEDEF_SUBTEST_TYPE(FPU_BINARY_R80_T, FPU_BINARY_R80_TEST_T, PFNIEMAIMPLFPUR80);
3279	enum { kFpuBinaryHint_fprem = 1, };
3280
3281	static const FPU_BINARY_R80_T g_aFpuBinaryR80[] =
3282	{
3283	ENTRY(fadd_r80_by_r80),
3284	ENTRY(fsub_r80_by_r80),
3285	ENTRY(fsubr_r80_by_r80),
3286	ENTRY(fmul_r80_by_r80),
3287	ENTRY(fdiv_r80_by_r80),
3288	ENTRY(fdivr_r80_by_r80),
3289	ENTRY_EX(fprem_r80_by_r80, kFpuBinaryHint_fprem),
3290	ENTRY_EX(fprem1_r80_by_r80, kFpuBinaryHint_fprem),
3291	ENTRY(fscale_r80_by_r80),
3292	ENTRY_AMD( fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3293	ENTRY_INTEL(fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3294	ENTRY_AMD( fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3295	ENTRY_INTEL(fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3296	ENTRY_AMD( fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3297	ENTRY_INTEL(fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3298	};
3299
3300	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3301	static void FpuBinaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3302	{
3303	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
3304
3305	static struct { RTFLOAT80U Val1, Val2; } const s_aSpecials[] =
3306	{
3307	{ RTFLOAT80U_INIT_C(1, 0xdd762f07f2e80eef, 30142), /* causes weird overflows with DOWN and NEAR rounding. */
3308	RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3309	{ RTFLOAT80U_INIT_ZERO(0), /* causes weird overflows with UP and NEAR rounding when precision is lower than 64. */
3310	RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3311	{ RTFLOAT80U_INIT_ZERO(0), /* minus variant */
3312	RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3313	{ RTFLOAT80U_INIT_C(0, 0xcef238bb9a0afd86, 577 + RTFLOAT80U_EXP_BIAS), /* for fprem and fprem1, max sequence length */
3314	RTFLOAT80U_INIT_C(0, 0xf11684ec0beaad94, 1 + RTFLOAT80U_EXP_BIAS) },
3315	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, -13396 + RTFLOAT80U_EXP_BIAS), /* for fdiv. We missed PE. */
3316	RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 16383 + RTFLOAT80U_EXP_BIAS) },
3317	{ RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3318	RTFLOAT80U_INIT_C(0, 0xe000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3319	{ RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3320	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3321	/* fscale: This may seriously increase the exponent, and it turns out overflow and underflow behaviour changes
3322	once RTFLOAT80U_EXP_BIAS_ADJUST is exceeded. */
3323	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1 */
3324	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3325	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^64 */
3326	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 6 + RTFLOAT80U_EXP_BIAS) },
3327	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1024 */
3328	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 10 + RTFLOAT80U_EXP_BIAS) },
3329	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^4096 */
3330	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 12 + RTFLOAT80U_EXP_BIAS) },
3331	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16384 */
3332	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 49150 */
3333	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3334	RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3335	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3336	RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3337	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^32768 - result is within range on 10980XE */
3338	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 65534 */
3339	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^65536 */
3340	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS) },
3341	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1048576 */
3342	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS) },
3343	{ RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16777216 */
3344	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS) },
3345	{ RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3346	RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24575 - within 10980XE range */
3347	{ RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: max * 2^-24577 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3348	RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24576 - outside 10980XE range, behaviour changes! */
3349	/* fscale: Negative variants for the essentials of the above. */
3350	{ RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3351	RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3352	{ RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3353	RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3354	{ RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3355	RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57342 - within 10980XE range */
3356	{ RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: max * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3357	RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57343 - outside 10980XE range, behaviour changes! */
3358	/* fscale: Some fun with denormals and pseudo-denormals. */
3359	{ RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^-4 */
3360	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3361	{ RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^+1 */
3362	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3363	{ RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), RTFLOAT80U_INIT_ZERO(0) }, /* for fscale: max * 2^+0 */
3364	{ RTFLOAT80U_INIT_C(0, 0x0000000000000008, 0), /* for fscale: max * 2^-4 => underflow */
3365	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3366	{ RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3367	{ RTFLOAT80U_INIT_C(1, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3368	{ RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^-4 */
3369	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3370	{ RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+0 */
3371	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3372	{ RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+1 */
3373	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS) },
3374	};
3375
3376	X86FXSTATE State;
3377	RT_ZERO(State);
3378	uint32_t cMinNormalPairs = (cTests - 144) / 4;
3379	uint32_t cMinTargetRangeInputs = cMinNormalPairs / 2;
3380	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3381	{
3382	PFNIEMAIMPLFPUR80 const pfn = g_aFpuBinaryR80[iFn].pfnNative ? g_aFpuBinaryR80[iFn].pfnNative : g_aFpuBinaryR80[iFn].pfn;
3383	PRTSTREAM pOutFn = pOut;
3384	if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3385	{
3386	if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3387	continue;
3388	pOutFn = pOutCpu;
3389	}
3390
3391	GenerateArrayStart(pOutFn, g_aFpuBinaryR80[iFn].pszName, "FPU_BINARY_R80_TEST_T");
3392	uint32_t iTestOutput = 0;
3393	uint32_t cNormalInputPairs = 0;
3394	uint32_t cTargetRangeInputs = 0;
3395	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3396	{
3397	RTFLOAT80U InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aSpecials[iTest - cTests].Val1;
3398	RTFLOAT80U InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
3399	bool fTargetRange = false;
3400	if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3401	{
3402	cNormalInputPairs++;
3403	if ( g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem
3404	&& (uint32_t)InVal1.s.uExponent - (uint32_t)InVal2.s.uExponent - (uint32_t)64 <= (uint32_t)512)
3405	cTargetRangeInputs += fTargetRange = true;
3406	else if (cTargetRangeInputs < cMinTargetRangeInputs && iTest < cTests)
3407	if (g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
3408	{ /* The aim is two values with an exponent difference between 64 and 640 so we can do the whole sequence. */
3409	InVal2.s.uExponent = RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 66);
3410	InVal1.s.uExponent = RTRandU32Ex(InVal2.s.uExponent + 64, RT_MIN(InVal2.s.uExponent + 512, RTFLOAT80U_EXP_MAX - 1));
3411	cTargetRangeInputs += fTargetRange = true;
3412	}
3413	}
3414	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3415	{
3416	iTest -= 1;
3417	continue;
3418	}
3419
3420	uint16_t const fFcwExtra = 0;
3421	uint16_t const fFcw = RandFcw();
3422	State.FSW = RandFsw();
3423
3424	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3425	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3426	{
3427	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
3428	\| (iRounding << X86_FCW_RC_SHIFT)
3429	\| (iPrecision << X86_FCW_PC_SHIFT)
3430	\| X86_FCW_MASK_ALL;
3431	IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3432	pfn(&State, &ResM, &InVal1, &InVal2);
3433	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
3434	State.FCW \| fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3435	GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3436
3437	State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
3438	IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3439	pfn(&State, &ResU, &InVal1, &InVal2);
3440	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
3441	State.FCW \| fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3442	GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3443
3444	uint16_t fXcpt = (ResM.FSW \| ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
3445	if (fXcpt)
3446	{
3447	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
3448	IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3449	pfn(&State, &Res1, &InVal1, &InVal2);
3450	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
3451	State.FCW \| fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3452	GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3453	if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
3454	{
3455	fXcpt \|= Res1.FSW & X86_FSW_XCPT_MASK;
3456	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
3457	IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3458	pfn(&State, &Res2, &InVal1, &InVal2);
3459	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
3460	State.FCW \| fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3461	GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3462	}
3463	if (!RT_IS_POWER_OF_TWO(fXcpt))
3464	for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
3465	if (fUnmasked & fXcpt)
3466	{
3467	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| (fXcpt & ~fUnmasked);
3468	IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3469	pfn(&State, &Res3, &InVal1, &InVal2);
3470	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
3471	State.FCW \| fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3472	GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
3473	}
3474	}
3475
3476	/* If the values are in range and caused no exceptions, do the whole series of
3477	partial reminders till we get the non-partial one or run into an exception. */
3478	if (fTargetRange && fXcpt == 0 && g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
3479	{
3480	IEMFPURESULT ResPrev = ResM;
3481	for (unsigned i = 0; i < 32 && (ResPrev.FSW & (X86_FSW_C2 \| X86_FSW_XCPT_MASK)) == X86_FSW_C2; i++)
3482	{
3483	State.FCW = State.FCW \| X86_FCW_MASK_ALL;
3484	State.FSW = ResPrev.FSW;
3485	IEMFPURESULT ResSeq = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3486	pfn(&State, &ResSeq, &ResPrev.r80Result, &InVal2);
3487	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/seq%u = #%u */\n",
3488	State.FCW \| fFcwExtra, State.FSW, ResSeq.FSW, GenFormatR80(&ResPrev.r80Result),
3489	GenFormatR80(&InVal2), GenFormatR80(&ResSeq.r80Result),
3490	iTest, iRounding, iPrecision, i + 1, iTestOutput++);
3491	ResPrev = ResSeq;
3492	}
3493	}
3494	}
3495	}
3496	GenerateArrayEnd(pOutFn, g_aFpuBinaryR80[iFn].pszName);
3497	}
3498	}
3499	#endif
3500
3501
3502	static void FpuBinaryR80Test(void)
3503	{
3504	X86FXSTATE State;
3505	RT_ZERO(State);
3506	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3507	{
3508	if (!SubTestAndCheckIfEnabled(g_aFpuBinaryR80[iFn].pszName))
3509	continue;
3510
3511	uint32_t const cTests = *g_aFpuBinaryR80[iFn].pcTests;
3512	FPU_BINARY_R80_TEST_T const * const paTests = g_aFpuBinaryR80[iFn].paTests;
3513	PFNIEMAIMPLFPUR80 pfn = g_aFpuBinaryR80[iFn].pfn;
3514	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryR80[iFn]);
3515	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3516	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3517	{
3518	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3519	{
3520	RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3521	RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3522	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3523	State.FCW = paTests[iTest].fFcw;
3524	State.FSW = paTests[iTest].fFswIn;
3525	pfn(&State, &Res, &InVal1, &InVal2);
3526	if ( Res.FSW != paTests[iTest].fFswOut
3527	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal))
3528	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3529	"%s -> fsw=%#06x %s\n"
3530	"%s expected %#06x %s%s%s (%s)\n",
3531	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3532	FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3533	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3534	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
3535	FswDiff(Res.FSW, paTests[iTest].fFswOut),
3536	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
3537	FormatFcw(paTests[iTest].fFcw) );
3538	}
3539	pfn = g_aFpuBinaryR80[iFn].pfnNative;
3540	}
3541	}
3542	}
3543
3544
3545	/*
3546	* Binary FPU operations on one 80-bit floating point value and one 64-bit or 32-bit one.
3547	*/
3548	#define int64_t_IS_NORMAL(a) 1
3549	#define int32_t_IS_NORMAL(a) 1
3550	#define int16_t_IS_NORMAL(a) 1
3551
3552	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3553	static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryR64Specials[] =
3554	{
3555	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3556	RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3557	};
3558	static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryR32Specials[] =
3559	{
3560	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3561	RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3562	};
3563	static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryI32Specials[] =
3564	{
3565	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3566	};
3567	static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryI16Specials[] =
3568	{
3569	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3570	};
3571
3572	# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3573	static void FpuBinary ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3574	{ \
3575	cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3576	\
3577	X86FXSTATE State; \
3578	RT_ZERO(State); \
3579	uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3580	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3581	{ \
3582	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3583	uint32_t cNormalInputPairs = 0; \
3584	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinary ## a_UpBits ## Specials); iTest += 1) \
3585	{ \
3586	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3587	: s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val1; \
3588	a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3589	: s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val2; \
3590	if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3591	cNormalInputPairs++; \
3592	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3593	{ \
3594	iTest -= 1; \
3595	continue; \
3596	} \
3597	\
3598	uint16_t const fFcw = RandFcw(); \
3599	State.FSW = RandFsw(); \
3600	\
3601	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3602	{ \
3603	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++) \
3604	{ \
3605	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3606	{ \
3607	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL)) \
3608	\| (iRounding << X86_FCW_RC_SHIFT) \
3609	\| (iPrecision << X86_FCW_PC_SHIFT) \
3610	\| iMask; \
3611	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3612	a_aSubTests[iFn].pfn(&State, &Res, &InVal1, &InVal2); \
3613	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%c */\n", \
3614	State.FCW, State.FSW, Res.FSW, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3615	GenFormatR80(&Res.r80Result), iTest, iRounding, iPrecision, iMask ? 'c' : 'u'); \
3616	} \
3617	} \
3618	} \
3619	} \
3620	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3621	} \
3622	}
3623	#else
3624	# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3625	#endif
3626
3627	#define TEST_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_I, a_Type2, a_SubTestType, a_aSubTests, a_TestType) \
3628	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits); \
3629	\
3630	static const a_SubTestType a_aSubTests[] = \
3631	{ \
3632	ENTRY(RT_CONCAT4(f, a_I, add_r80_by_, a_LoBits)), \
3633	ENTRY(RT_CONCAT4(f, a_I, mul_r80_by_, a_LoBits)), \
3634	ENTRY(RT_CONCAT4(f, a_I, sub_r80_by_, a_LoBits)), \
3635	ENTRY(RT_CONCAT4(f, a_I, subr_r80_by_, a_LoBits)), \
3636	ENTRY(RT_CONCAT4(f, a_I, div_r80_by_, a_LoBits)), \
3637	ENTRY(RT_CONCAT4(f, a_I, divr_r80_by_, a_LoBits)), \
3638	}; \
3639	\
3640	GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3641	\
3642	static void FpuBinary ## a_UpBits ## Test(void) \
3643	{ \
3644	X86FXSTATE State; \
3645	RT_ZERO(State); \
3646	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3647	{ \
3648	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3649	\
3650	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3651	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3652	PFNIEMAIMPLFPU ## a_UpBits pfn = a_aSubTests[iFn].pfn; \
3653	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3654	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3655	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3656	{ \
3657	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3658	{ \
3659	RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3660	a_Type2 const InVal2 = paTests[iTest].InVal2; \
3661	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3662	State.FCW = paTests[iTest].fFcw; \
3663	State.FSW = paTests[iTest].fFswIn; \
3664	pfn(&State, &Res, &InVal1, &InVal2); \
3665	if ( Res.FSW != paTests[iTest].fFswOut \
3666	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal)) \
3667	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3668	"%s -> fsw=%#06x %s\n" \
3669	"%s expected %#06x %s%s%s (%s)\n", \
3670	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3671	FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3672	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3673	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal), \
3674	FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3675	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "", \
3676	FormatFcw(paTests[iTest].fFcw) ); \
3677	} \
3678	pfn = a_aSubTests[iFn].pfnNative; \
3679	} \
3680	} \
3681	}
3682
3683	TEST_FPU_BINARY_SMALL(0, 64, r64, R64, RT_NOTHING, RTFLOAT64U, FPU_BINARY_R64_T, g_aFpuBinaryR64, FPU_BINARY_R64_TEST_T)
3684	TEST_FPU_BINARY_SMALL(0, 32, r32, R32, RT_NOTHING, RTFLOAT32U, FPU_BINARY_R32_T, g_aFpuBinaryR32, FPU_BINARY_R32_TEST_T)
3685	TEST_FPU_BINARY_SMALL(1, 32, i32, I32, i, int32_t, FPU_BINARY_I32_T, g_aFpuBinaryI32, FPU_BINARY_I32_TEST_T)
3686	TEST_FPU_BINARY_SMALL(1, 16, i16, I16, i, int16_t, FPU_BINARY_I16_T, g_aFpuBinaryI16, FPU_BINARY_I16_TEST_T)
3687
3688
3689	/*
3690	* Binary operations on 80-, 64- and 32-bit floating point only affecting FSW.
3691	*/
3692	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3693	static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryFswR80Specials[] =
3694	{
3695	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3696	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3697	};
3698	static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryFswR64Specials[] =
3699	{
3700	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3701	RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3702	};
3703	static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryFswR32Specials[] =
3704	{
3705	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3706	RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3707	};
3708	static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryFswI32Specials[] =
3709	{
3710	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3711	};
3712	static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryFswI16Specials[] =
3713	{
3714	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3715	};
3716
3717	# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3718	static void FpuBinaryFsw ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3719	{ \
3720	cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3721	\
3722	X86FXSTATE State; \
3723	RT_ZERO(State); \
3724	uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3725	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3726	{ \
3727	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3728	uint32_t cNormalInputPairs = 0; \
3729	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryFsw ## a_UpBits ## Specials); iTest += 1) \
3730	{ \
3731	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3732	: s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val1; \
3733	a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3734	: s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val2; \
3735	if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3736	cNormalInputPairs++; \
3737	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3738	{ \
3739	iTest -= 1; \
3740	continue; \
3741	} \
3742	\
3743	uint16_t const fFcw = RandFcw(); \
3744	State.FSW = RandFsw(); \
3745	\
3746	/* Guess these aren't affected by precision or rounding, so just flip the exception mask. */ \
3747	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3748	{ \
3749	State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) \| iMask; \
3750	uint16_t fFswOut = 0; \
3751	a_aSubTests[iFn].pfn(&State, &fFswOut, &InVal1, &InVal2); \
3752	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%c */\n", \
3753	State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3754	iTest, iMask ? 'c' : 'u'); \
3755	} \
3756	} \
3757	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3758	} \
3759	}
3760	#else
3761	# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3762	#endif
3763
3764	#define TEST_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_SubTestType, a_aSubTests, a_TestType, ...) \
3765	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits ## FSW); \
3766	\
3767	static const a_SubTestType a_aSubTests[] = \
3768	{ \
3769	__VA_ARGS__ \
3770	}; \
3771	\
3772	GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3773	\
3774	static void FpuBinaryFsw ## a_UpBits ## Test(void) \
3775	{ \
3776	X86FXSTATE State; \
3777	RT_ZERO(State); \
3778	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3779	{ \
3780	if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3781	\
3782	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3783	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3784	PFNIEMAIMPLFPU ## a_UpBits ## FSW pfn = a_aSubTests[iFn].pfn; \
3785	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3786	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3787	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3788	{ \
3789	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3790	{ \
3791	uint16_t fFswOut = 0; \
3792	RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3793	a_Type2 const InVal2 = paTests[iTest].InVal2; \
3794	State.FCW = paTests[iTest].fFcw; \
3795	State.FSW = paTests[iTest].fFswIn; \
3796	pfn(&State, &fFswOut, &InVal1, &InVal2); \
3797	if (fFswOut != paTests[iTest].fFswOut) \
3798	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3799	"%s -> fsw=%#06x\n" \
3800	"%s expected %#06x %s (%s)\n", \
3801	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3802	FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3803	iVar ? " " : "", fFswOut, \
3804	iVar ? " " : "", paTests[iTest].fFswOut, \
3805	FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) ); \
3806	} \
3807	pfn = a_aSubTests[iFn].pfnNative; \
3808	} \
3809	} \
3810	}
3811
3812	TEST_FPU_BINARY_FSW(0, 80, R80, RTFLOAT80U, FPU_BINARY_FSW_R80_T, g_aFpuBinaryFswR80, FPU_BINARY_R80_TEST_T, ENTRY(fcom_r80_by_r80), ENTRY(fucom_r80_by_r80))
3813	TEST_FPU_BINARY_FSW(0, 64, R64, RTFLOAT64U, FPU_BINARY_FSW_R64_T, g_aFpuBinaryFswR64, FPU_BINARY_R64_TEST_T, ENTRY(fcom_r80_by_r64))
3814	TEST_FPU_BINARY_FSW(0, 32, R32, RTFLOAT32U, FPU_BINARY_FSW_R32_T, g_aFpuBinaryFswR32, FPU_BINARY_R32_TEST_T, ENTRY(fcom_r80_by_r32))
3815	TEST_FPU_BINARY_FSW(1, 32, I32, int32_t, FPU_BINARY_FSW_I32_T, g_aFpuBinaryFswI32, FPU_BINARY_I32_TEST_T, ENTRY(ficom_r80_by_i32))
3816	TEST_FPU_BINARY_FSW(1, 16, I16, int16_t, FPU_BINARY_FSW_I16_T, g_aFpuBinaryFswI16, FPU_BINARY_I16_TEST_T, ENTRY(ficom_r80_by_i16))
3817
3818
3819	/*
3820	* Binary operations on 80-bit floating point that effects only EFLAGS and possibly FSW.
3821	*/
3822	TYPEDEF_SUBTEST_TYPE(FPU_BINARY_EFL_R80_T, FPU_BINARY_EFL_R80_TEST_T, PFNIEMAIMPLFPUR80EFL);
3823
3824	static const FPU_BINARY_EFL_R80_T g_aFpuBinaryEflR80[] =
3825	{
3826	ENTRY(fcomi_r80_by_r80),
3827	ENTRY(fucomi_r80_by_r80),
3828	};
3829
3830	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3831	static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryEflR80Specials[] =
3832	{
3833	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3834	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3835	};
3836
3837	static void FpuBinaryEflR80Generate(PRTSTREAM pOut, uint32_t cTests)
3838	{
3839	cTests = RT_MAX(160, cTests); /* there are 144 standard input variations */
3840
3841	X86FXSTATE State;
3842	RT_ZERO(State);
3843	uint32_t cMinNormalPairs = (cTests - 144) / 4;
3844	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3845	{
3846	GenerateArrayStart(pOut, g_aFpuBinaryEflR80[iFn].pszName, "FPU_BINARY_EFL_R80_TEST_T");
3847	uint32_t cNormalInputPairs = 0;
3848	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryEflR80Specials); iTest += 1)
3849	{
3850	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val1;
3851	RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val2;
3852	if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3853	cNormalInputPairs++;
3854	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3855	{
3856	iTest -= 1;
3857	continue;
3858	}
3859
3860	uint16_t const fFcw = RandFcw();
3861	State.FSW = RandFsw();
3862
3863	/* Guess these aren't affected by precision or rounding, so just flip the exception mask. */
3864	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
3865	{
3866	State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) \| iMask;
3867	uint16_t uFswOut = 0;
3868	uint32_t fEflOut = g_aFpuBinaryEflR80[iFn].pfn(&State, &uFswOut, &InVal1, &InVal2);
3869	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %#08x }, /* #%u/%c */\n",
3870	State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal1), GenFormatR80(&InVal2), fEflOut,
3871	iTest, iMask ? 'c' : 'u');
3872	}
3873	}
3874	GenerateArrayEnd(pOut, g_aFpuBinaryEflR80[iFn].pszName);
3875	}
3876	}
3877	#endif /TSTIEMAIMPL_WITH_GENERATOR/
3878
3879	static void FpuBinaryEflR80Test(void)
3880	{
3881	X86FXSTATE State;
3882	RT_ZERO(State);
3883	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3884	{
3885	if (!SubTestAndCheckIfEnabled(g_aFpuBinaryEflR80[iFn].pszName))
3886	continue;
3887
3888	uint32_t const cTests = *g_aFpuBinaryEflR80[iFn].pcTests;
3889	FPU_BINARY_EFL_R80_TEST_T const * const paTests = g_aFpuBinaryEflR80[iFn].paTests;
3890	PFNIEMAIMPLFPUR80EFL pfn = g_aFpuBinaryEflR80[iFn].pfn;
3891	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryEflR80[iFn]);
3892	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3893	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3894	{
3895	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3896	{
3897	RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3898	RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3899	State.FCW = paTests[iTest].fFcw;
3900	State.FSW = paTests[iTest].fFswIn;
3901	uint16_t uFswOut = 0;
3902	uint32_t fEflOut = pfn(&State, &uFswOut, &InVal1, &InVal2);
3903	if ( uFswOut != paTests[iTest].fFswOut
3904	\|\| fEflOut != paTests[iTest].fEflOut)
3905	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3906	"%s -> fsw=%#06x efl=%#08x\n"
3907	"%s expected %#06x %#08x %s%s (%s)\n",
3908	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3909	FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3910	iVar ? " " : "", uFswOut, fEflOut,
3911	iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].fEflOut,
3912	FswDiff(uFswOut, paTests[iTest].fFswOut), EFlagsDiff(fEflOut, paTests[iTest].fEflOut),
3913	FormatFcw(paTests[iTest].fFcw));
3914	}
3915	pfn = g_aFpuBinaryEflR80[iFn].pfnNative;
3916	}
3917	}
3918	}
3919
3920
3921	/*********************************************************************************************************************************
3922	* x87 FPU Unary Operations *
3923	*********************************************************************************************************************************/
3924
3925	/*
3926	* Unary FPU operations on one 80-bit floating point value.
3927	*
3928	* Note! The FCW reserved bit 7 is used to indicate whether a test may produce
3929	* a rounding error or not.
3930	*/
3931	TYPEDEF_SUBTEST_TYPE(FPU_UNARY_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARY);
3932
3933	enum { kUnary_Accurate = 0, kUnary_Accurate_Trigonometry /probably not accurate, but need impl to know/, kUnary_Rounding_F2xm1 };
3934	static const FPU_UNARY_R80_T g_aFpuUnaryR80[] =
3935	{
3936	ENTRY_EX( fabs_r80, kUnary_Accurate),
3937	ENTRY_EX( fchs_r80, kUnary_Accurate),
3938	ENTRY_AMD_EX( f2xm1_r80, 0, kUnary_Accurate), // C1 differs for -1m0x3fb263cc2c331e15^-2654 (different ln2 constant?)
3939	ENTRY_INTEL_EX(f2xm1_r80, 0, kUnary_Rounding_F2xm1),
3940	ENTRY_EX( fsqrt_r80, kUnary_Accurate),
3941	ENTRY_EX( frndint_r80, kUnary_Accurate),
3942	ENTRY_AMD_EX( fsin_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences for pseudo denormals and others (e.g. -1m0x2b1e5683cbca5725^-3485)
3943	ENTRY_INTEL_EX(fsin_r80, 0, kUnary_Accurate_Trigonometry),
3944	ENTRY_AMD_EX( fcos_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences
3945	ENTRY_INTEL_EX(fcos_r80, 0, kUnary_Accurate_Trigonometry),
3946	};
3947
3948	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3949
3950	static bool FpuUnaryR80MayHaveRoundingError(PCRTFLOAT80U pr80Val, int enmKind)
3951	{
3952	if ( enmKind == kUnary_Rounding_F2xm1
3953	&& RTFLOAT80U_IS_NORMAL(pr80Val)
3954	&& pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS
3955	&& pr80Val->s.uExponent >= RTFLOAT80U_EXP_BIAS - 69)
3956	return true;
3957	return false;
3958	}
3959
3960	static void FpuUnaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3961	{
3962	static RTFLOAT80U const s_aSpecials[] =
3963	{
3964	RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* 0.5 (for f2xm1) */
3965	RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* -0.5 (for f2xm1) */
3966	RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* 1.0 (for f2xm1) */
3967	RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* -1.0 (for f2xm1) */
3968	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0), /* +1.0^-16382 */
3969	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 0), /* -1.0^-16382 */
3970	RTFLOAT80U_INIT_C(0, 0xc000000000000000, 0), /* +1.1^-16382 */
3971	RTFLOAT80U_INIT_C(1, 0xc000000000000000, 0), /* -1.1^-16382 */
3972	RTFLOAT80U_INIT_C(0, 0xc000100000000000, 0), /* +1.1xxx1^-16382 */
3973	RTFLOAT80U_INIT_C(1, 0xc000100000000000, 0), /* -1.1xxx1^-16382 */
3974	};
3975	X86FXSTATE State;
3976	RT_ZERO(State);
3977	uint32_t cMinNormals = cTests / 4;
3978	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
3979	{
3980	PFNIEMAIMPLFPUR80UNARY const pfn = g_aFpuUnaryR80[iFn].pfnNative ? g_aFpuUnaryR80[iFn].pfnNative : g_aFpuUnaryR80[iFn].pfn;
3981	PRTSTREAM pOutFn = pOut;
3982	if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3983	{
3984	if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3985	continue;
3986	pOutFn = pOutCpu;
3987	}
3988
3989	GenerateArrayStart(pOutFn, g_aFpuUnaryR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
3990	uint32_t iTestOutput = 0;
3991	uint32_t cNormalInputs = 0;
3992	uint32_t cTargetRangeInputs = 0;
3993	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3994	{
3995	RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
3996	if (RTFLOAT80U_IS_NORMAL(&InVal))
3997	{
3998	if (g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1)
3999	{
4000	unsigned uTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1
4001	? RTFLOAT80U_EXP_BIAS /* 2^0..2^-69 / : RTFLOAT80U_EXP_BIAS + 63 + 1 / 2^64..2^-64 */;
4002	unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
4003	if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
4004	cTargetRangeInputs++;
4005	else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
4006	{
4007	InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
4008	cTargetRangeInputs++;
4009	}
4010	}
4011	cNormalInputs++;
4012	}
4013	else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4014	{
4015	iTest -= 1;
4016	continue;
4017	}
4018
4019	uint16_t const fFcwExtra = FpuUnaryR80MayHaveRoundingError(&InVal, g_aFpuUnaryR80[iFn].uExtra) ? 0x80 : 0;
4020	uint16_t const fFcw = RandFcw();
4021	State.FSW = RandFsw();
4022
4023	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4024	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4025	{
4026	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
4027	\| (iRounding << X86_FCW_RC_SHIFT)
4028	\| (iPrecision << X86_FCW_PC_SHIFT)
4029	\| X86_FCW_MASK_ALL;
4030	IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4031	pfn(&State, &ResM, &InVal);
4032	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
4033	State.FCW \| fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal),
4034	GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
4035
4036	State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4037	IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4038	pfn(&State, &ResU, &InVal);
4039	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4040	State.FCW \| fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal),
4041	GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
4042
4043	uint16_t fXcpt = (ResM.FSW \| ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4044	if (fXcpt)
4045	{
4046	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
4047	IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4048	pfn(&State, &Res1, &InVal);
4049	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4050	State.FCW \| fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal),
4051	GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4052	if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4053	{
4054	fXcpt \|= Res1.FSW & X86_FSW_XCPT_MASK;
4055	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
4056	IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4057	pfn(&State, &Res2, &InVal);
4058	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4059	State.FCW \| fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal),
4060	GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4061	}
4062	if (!RT_IS_POWER_OF_TWO(fXcpt))
4063	for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4064	if (fUnmasked & fXcpt)
4065	{
4066	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| (fXcpt & ~fUnmasked);
4067	IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4068	pfn(&State, &Res3, &InVal);
4069	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4070	State.FCW \| fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal),
4071	GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4072	}
4073	}
4074	}
4075	}
4076	GenerateArrayEnd(pOutFn, g_aFpuUnaryR80[iFn].pszName);
4077	}
4078	}
4079	#endif
4080
4081	static bool FpuIsEqualFcwMaybeIgnoreRoundErr(uint16_t fFcw1, uint16_t fFcw2, bool fRndErrOk, bool *pfRndErr)
4082	{
4083	if (fFcw1 == fFcw2)
4084	return true;
4085	if (fRndErrOk && (fFcw1 & ~X86_FSW_C1) == (fFcw2 & ~X86_FSW_C1))
4086	{
4087	*pfRndErr = true;
4088	return true;
4089	}
4090	return false;
4091	}
4092
4093	static bool FpuIsEqualR80MaybeIgnoreRoundErr(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, bool fRndErrOk, bool *pfRndErr)
4094	{
4095	if (RTFLOAT80U_ARE_IDENTICAL(pr80Val1, pr80Val2))
4096	return true;
4097	if ( fRndErrOk
4098	&& pr80Val1->s.fSign == pr80Val2->s.fSign)
4099	{
4100	if ( ( pr80Val1->s.uExponent == pr80Val2->s.uExponent
4101	&& ( pr80Val1->s.uMantissa > pr80Val2->s.uMantissa
4102	? pr80Val1->s.uMantissa - pr80Val2->s.uMantissa == 1
4103	: pr80Val2->s.uMantissa - pr80Val1->s.uMantissa == 1))
4104	\|\|
4105	( pr80Val1->s.uExponent + 1 == pr80Val2->s.uExponent
4106	&& pr80Val1->s.uMantissa == UINT64_MAX
4107	&& pr80Val2->s.uMantissa == RT_BIT_64(63))
4108	\|\|
4109	( pr80Val1->s.uExponent == pr80Val2->s.uExponent + 1
4110	&& pr80Val2->s.uMantissa == UINT64_MAX
4111	&& pr80Val1->s.uMantissa == RT_BIT_64(63)) )
4112	{
4113	*pfRndErr = true;
4114	return true;
4115	}
4116	}
4117	return false;
4118	}
4119
4120
4121	static void FpuUnaryR80Test(void)
4122	{
4123	X86FXSTATE State;
4124	RT_ZERO(State);
4125	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
4126	{
4127	if (!SubTestAndCheckIfEnabled(g_aFpuUnaryR80[iFn].pszName))
4128	continue;
4129
4130	uint32_t const cTests = *g_aFpuUnaryR80[iFn].pcTests;
4131	FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryR80[iFn].paTests;
4132	PFNIEMAIMPLFPUR80UNARY pfn = g_aFpuUnaryR80[iFn].pfn;
4133	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryR80[iFn]);
4134	uint32_t cRndErrs = 0;
4135	uint32_t cPossibleRndErrs = 0;
4136	if (!cTests) RTTestSkipped(g_hTest, "no tests");
4137	for (uint32_t iVar = 0; iVar < cVars; iVar++)
4138	{
4139	for (uint32_t iTest = 0; iTest < cTests; iTest++)
4140	{
4141	RTFLOAT80U const InVal = paTests[iTest].InVal;
4142	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4143	bool const fRndErrOk = RT_BOOL(paTests[iTest].fFcw & 0x80);
4144	State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80;
4145	State.FSW = paTests[iTest].fFswIn;
4146	pfn(&State, &Res, &InVal);
4147	bool fRndErr = false;
4148	if ( !FpuIsEqualFcwMaybeIgnoreRoundErr(Res.FSW, paTests[iTest].fFswOut, fRndErrOk, &fRndErr)
4149	\|\| !FpuIsEqualR80MaybeIgnoreRoundErr(&Res.r80Result, &paTests[iTest].OutVal, fRndErrOk, &fRndErr))
4150	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4151	"%s -> fsw=%#06x %s\n"
4152	"%s expected %#06x %s%s%s%s (%s)\n",
4153	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4154	FormatR80(&paTests[iTest].InVal),
4155	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
4156	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
4157	FswDiff(Res.FSW, paTests[iTest].fFswOut),
4158	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
4159	fRndErrOk ? " - rounding errors ok" : "", FormatFcw(paTests[iTest].fFcw));
4160	cRndErrs += fRndErr;
4161	cPossibleRndErrs += fRndErrOk;
4162	}
4163	pfn = g_aFpuUnaryR80[iFn].pfnNative;
4164	}
4165	if (cPossibleRndErrs > 0)
4166	RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS, "rounding errors: %u out of %u\n", cRndErrs, cPossibleRndErrs);
4167	}
4168	}
4169
4170
4171	/*
4172	* Unary FPU operations on one 80-bit floating point value, but only affects the FSW.
4173	*/
4174	TYPEDEF_SUBTEST_TYPE(FPU_UNARY_FSW_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYFSW);
4175
4176	static const FPU_UNARY_FSW_R80_T g_aFpuUnaryFswR80[] =
4177	{
4178	ENTRY(ftst_r80),
4179	ENTRY_EX(fxam_r80, 1),
4180	};
4181
4182	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4183	static void FpuUnaryFswR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4184	{
4185	static RTFLOAT80U const s_aSpecials[] =
4186	{
4187	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4188	};
4189
4190	X86FXSTATE State;
4191	RT_ZERO(State);
4192	uint32_t cMinNormals = cTests / 4;
4193	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4194	{
4195	bool const fIsFxam = g_aFpuUnaryFswR80[iFn].uExtra == 1;
4196	PFNIEMAIMPLFPUR80UNARYFSW const pfn = g_aFpuUnaryFswR80[iFn].pfnNative ? g_aFpuUnaryFswR80[iFn].pfnNative : g_aFpuUnaryFswR80[iFn].pfn;
4197	PRTSTREAM pOutFn = pOut;
4198	if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4199	{
4200	if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4201	continue;
4202	pOutFn = pOutCpu;
4203	}
4204	State.FTW = 0;
4205
4206	GenerateArrayStart(pOutFn, g_aFpuUnaryFswR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
4207	uint32_t cNormalInputs = 0;
4208	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4209	{
4210	RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4211	if (RTFLOAT80U_IS_NORMAL(&InVal))
4212	cNormalInputs++;
4213	else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4214	{
4215	iTest -= 1;
4216	continue;
4217	}
4218
4219	uint16_t const fFcw = RandFcw();
4220	State.FSW = RandFsw();
4221	if (!fIsFxam)
4222	{
4223	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4224	{
4225	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4226	{
4227	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
4228	{
4229	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
4230	\| (iRounding << X86_FCW_RC_SHIFT)
4231	\| (iPrecision << X86_FCW_PC_SHIFT)
4232	\| iMask;
4233	uint16_t fFswOut = 0;
4234	pfn(&State, &fFswOut, &InVal);
4235	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u/%u/%u/%c */\n",
4236	State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal),
4237	iTest, iRounding, iPrecision, iMask ? 'c' : 'u');
4238	}
4239	}
4240	}
4241	}
4242	else
4243	{
4244	uint16_t fFswOut = 0;
4245	uint16_t const fEmpty = RTRandU32Ex(0, 3) == 3 ? 0x80 : 0; /* Using MBZ bit 7 in FCW to indicate empty tag value. */
4246	State.FTW = !fEmpty ? 1 << X86_FSW_TOP_GET(State.FSW) : 0;
4247	State.FCW = fFcw;
4248	pfn(&State, &fFswOut, &InVal);
4249	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u%s */\n",
4250	fFcw \| fEmpty, State.FSW, fFswOut, GenFormatR80(&InVal), iTest, fEmpty ? "/empty" : "");
4251	}
4252	}
4253	GenerateArrayEnd(pOutFn, g_aFpuUnaryFswR80[iFn].pszName);
4254	}
4255	}
4256	#endif
4257
4258
4259	static void FpuUnaryFswR80Test(void)
4260	{
4261	X86FXSTATE State;
4262	RT_ZERO(State);
4263	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4264	{
4265	if (!SubTestAndCheckIfEnabled(g_aFpuUnaryFswR80[iFn].pszName))
4266	continue;
4267
4268	uint32_t const cTests = *g_aFpuUnaryFswR80[iFn].pcTests;
4269	FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryFswR80[iFn].paTests;
4270	PFNIEMAIMPLFPUR80UNARYFSW pfn = g_aFpuUnaryFswR80[iFn].pfn;
4271	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryFswR80[iFn]);
4272	if (!cTests) RTTestSkipped(g_hTest, "no tests");
4273	for (uint32_t iVar = 0; iVar < cVars; iVar++)
4274	{
4275	for (uint32_t iTest = 0; iTest < cTests; iTest++)
4276	{
4277	RTFLOAT80U const InVal = paTests[iTest].InVal;
4278	uint16_t fFswOut = 0;
4279	State.FSW = paTests[iTest].fFswIn;
4280	State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80; /* see generator code */
4281	State.FTW = paTests[iTest].fFcw & 0x80 ? 0 : 1 << X86_FSW_TOP_GET(paTests[iTest].fFswIn);
4282	pfn(&State, &fFswOut, &InVal);
4283	if (fFswOut != paTests[iTest].fFswOut)
4284	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4285	"%s -> fsw=%#06x\n"
4286	"%s expected %#06x %s (%s%s)\n",
4287	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4288	FormatR80(&paTests[iTest].InVal),
4289	iVar ? " " : "", fFswOut,
4290	iVar ? " " : "", paTests[iTest].fFswOut,
4291	FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw),
4292	paTests[iTest].fFcw & 0x80 ? " empty" : "");
4293	}
4294	pfn = g_aFpuUnaryFswR80[iFn].pfnNative;
4295	}
4296	}
4297	}
4298
4299	/*
4300	* Unary FPU operations on one 80-bit floating point value, but with two outputs.
4301	*/
4302	TYPEDEF_SUBTEST_TYPE(FPU_UNARY_TWO_R80_T, FPU_UNARY_TWO_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYTWO);
4303
4304	static const FPU_UNARY_TWO_R80_T g_aFpuUnaryTwoR80[] =
4305	{
4306	ENTRY(fxtract_r80_r80),
4307	ENTRY_AMD( fptan_r80_r80, 0), // rounding differences
4308	ENTRY_INTEL(fptan_r80_r80, 0),
4309	ENTRY_AMD( fsincos_r80_r80, 0), // C1 differences & value differences (e.g. -1m0x235cf2f580244a27^-1696)
4310	ENTRY_INTEL(fsincos_r80_r80, 0),
4311	};
4312
4313	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4314	static void FpuUnaryTwoR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4315	{
4316	static RTFLOAT80U const s_aSpecials[] =
4317	{
4318	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4319	};
4320
4321	X86FXSTATE State;
4322	RT_ZERO(State);
4323	uint32_t cMinNormals = cTests / 4;
4324	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4325	{
4326	PFNIEMAIMPLFPUR80UNARYTWO const pfn = g_aFpuUnaryTwoR80[iFn].pfnNative ? g_aFpuUnaryTwoR80[iFn].pfnNative : g_aFpuUnaryTwoR80[iFn].pfn;
4327	PRTSTREAM pOutFn = pOut;
4328	if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4329	{
4330	if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4331	continue;
4332	pOutFn = pOutCpu;
4333	}
4334
4335	GenerateArrayStart(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName, "FPU_UNARY_TWO_R80_TEST_T");
4336	uint32_t iTestOutput = 0;
4337	uint32_t cNormalInputs = 0;
4338	uint32_t cTargetRangeInputs = 0;
4339	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4340	{
4341	RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4342	if (RTFLOAT80U_IS_NORMAL(&InVal))
4343	{
4344	if (iFn != 0)
4345	{
4346	unsigned uTargetExp = RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
4347	unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
4348	if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
4349	cTargetRangeInputs++;
4350	else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
4351	{
4352	InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
4353	cTargetRangeInputs++;
4354	}
4355	}
4356	cNormalInputs++;
4357	}
4358	else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4359	{
4360	iTest -= 1;
4361	continue;
4362	}
4363
4364	uint16_t const fFcwExtra = 0; /* for rounding error indication */
4365	uint16_t const fFcw = RandFcw();
4366	State.FSW = RandFsw();
4367
4368	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4369	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4370	{
4371	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
4372	\| (iRounding << X86_FCW_RC_SHIFT)
4373	\| (iPrecision << X86_FCW_PC_SHIFT)
4374	\| X86_FCW_MASK_ALL;
4375	IEMFPURESULTTWO ResM = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4376	pfn(&State, &ResM, &InVal);
4377	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
4378	State.FCW \| fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal), GenFormatR80(&ResM.r80Result1),
4379	GenFormatR80(&ResM.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4380
4381	State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4382	IEMFPURESULTTWO ResU = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4383	pfn(&State, &ResU, &InVal);
4384	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4385	State.FCW \| fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal), GenFormatR80(&ResU.r80Result1),
4386	GenFormatR80(&ResU.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4387
4388	uint16_t fXcpt = (ResM.FSW \| ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4389	if (fXcpt)
4390	{
4391	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
4392	IEMFPURESULTTWO Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4393	pfn(&State, &Res1, &InVal);
4394	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4395	State.FCW \| fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal), GenFormatR80(&Res1.r80Result1),
4396	GenFormatR80(&Res1.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4397	if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4398	{
4399	fXcpt \|= Res1.FSW & X86_FSW_XCPT_MASK;
4400	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| fXcpt;
4401	IEMFPURESULTTWO Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4402	pfn(&State, &Res2, &InVal);
4403	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4404	State.FCW \| fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal), GenFormatR80(&Res2.r80Result1),
4405	GenFormatR80(&Res2.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4406	}
4407	if (!RT_IS_POWER_OF_TWO(fXcpt))
4408	for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4409	if (fUnmasked & fXcpt)
4410	{
4411	State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) \| (fXcpt & ~fUnmasked);
4412	IEMFPURESULTTWO Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4413	pfn(&State, &Res3, &InVal);
4414	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4415	State.FCW \| fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal), GenFormatR80(&Res3.r80Result1),
4416	GenFormatR80(&Res3.r80Result2), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4417	}
4418	}
4419	}
4420	}
4421	GenerateArrayEnd(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName);
4422	}
4423	}
4424	#endif
4425
4426
4427	static void FpuUnaryTwoR80Test(void)
4428	{
4429	X86FXSTATE State;
4430	RT_ZERO(State);
4431	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4432	{
4433	if (!SubTestAndCheckIfEnabled(g_aFpuUnaryTwoR80[iFn].pszName))
4434	continue;
4435
4436	uint32_t const cTests = *g_aFpuUnaryTwoR80[iFn].pcTests;
4437	FPU_UNARY_TWO_R80_TEST_T const * const paTests = g_aFpuUnaryTwoR80[iFn].paTests;
4438	PFNIEMAIMPLFPUR80UNARYTWO pfn = g_aFpuUnaryTwoR80[iFn].pfn;
4439	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryTwoR80[iFn]);
4440	if (!cTests) RTTestSkipped(g_hTest, "no tests");
4441	for (uint32_t iVar = 0; iVar < cVars; iVar++)
4442	{
4443	for (uint32_t iTest = 0; iTest < cTests; iTest++)
4444	{
4445	IEMFPURESULTTWO Res = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4446	RTFLOAT80U const InVal = paTests[iTest].InVal;
4447	State.FCW = paTests[iTest].fFcw;
4448	State.FSW = paTests[iTest].fFswIn;
4449	pfn(&State, &Res, &InVal);
4450	if ( Res.FSW != paTests[iTest].fFswOut
4451	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1)
4452	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) )
4453	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4454	"%s -> fsw=%#06x %s %s\n"
4455	"%s expected %#06x %s %s %s%s%s (%s)\n",
4456	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4457	FormatR80(&paTests[iTest].InVal),
4458	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result1), FormatR80(&Res.r80Result2),
4459	iVar ? " " : "", paTests[iTest].fFswOut,
4460	FormatR80(&paTests[iTest].OutVal1), FormatR80(&paTests[iTest].OutVal2),
4461	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1) ? " - val1" : "",
4462	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) ? " - val2" : "",
4463	FswDiff(Res.FSW, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) );
4464	}
4465	pfn = g_aFpuUnaryTwoR80[iFn].pfnNative;
4466	}
4467	}
4468	}
4469
4470
4471	/*********************************************************************************************************************************
4472	* SSE floating point Binary Operations *
4473	*********************************************************************************************************************************/
4474
4475	/*
4476	* Binary SSE operations on packed single precision floating point values.
4477	*/
4478	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
4479
4480	static const SSE_BINARY_R32_T g_aSseBinaryR32[] =
4481	{
4482	ENTRY_BIN(addps_u128),
4483	ENTRY_BIN(mulps_u128),
4484	ENTRY_BIN(subps_u128),
4485	ENTRY_BIN(minps_u128),
4486	ENTRY_BIN(divps_u128),
4487	ENTRY_BIN(maxps_u128),
4488	ENTRY_BIN(haddps_u128),
4489	ENTRY_BIN(hsubps_u128),
4490	ENTRY_BIN(sqrtps_u128),
4491	ENTRY_BIN(addsubps_u128),
4492	};
4493
4494	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4495	static RTEXITCODE SseBinaryR32Generate(const char *pszDataFileFmt, uint32_t cTests)
4496	{
4497	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4498
4499	static struct { RTFLOAT32U aVal1[4], aVal2[4]; } const s_aSpecials[] =
4500	{
4501	{ { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), },
4502	{ RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) } },
4503	/** @todo More specials. */
4504	};
4505
4506	X86FXSTATE State;
4507	RT_ZERO(State);
4508	uint32_t cMinNormalPairs = (cTests - 144) / 4;
4509	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
4510	{
4511	PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR32[iFn].pfnNative ? g_aSseBinaryR32[iFn].pfnNative : g_aSseBinaryR32[iFn].pfn;
4512
4513	PRTSTREAM pStrmOut = NULL;
4514	int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR32[iFn].pszName);
4515	if (RT_FAILURE(rc))
4516	{
4517	RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR32[iFn].pszName, rc);
4518	return RTEXITCODE_FAILURE;
4519	}
4520
4521	uint32_t cNormalInputPairs = 0;
4522	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4523	{
4524	SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
4525
4526	TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4527	TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
4528	TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
4529	TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
4530
4531	TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4532	TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[1];
4533	TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[2];
4534	TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[3];
4535
4536	if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
4537	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
4538	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
4539	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
4540	cNormalInputPairs++;
4541	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4542	{
4543	iTest -= 1;
4544	continue;
4545	}
4546
4547	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
4548	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4549	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
4550	for (uint8_t iFz = 0; iFz < 2; iFz++)
4551	{
4552	State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
4553	\| (iRounding << X86_MXCSR_RC_SHIFT)
4554	\| (iDaz ? X86_MXCSR_DAZ : 0)
4555	\| (iFz ? X86_MXCSR_FZ : 0)
4556	\| X86_MXCSR_XCPT_MASK;
4557	IEMSSERESULT ResM; RT_ZERO(ResM);
4558	pfn(&State, &ResM, &TestData.InVal1, &TestData.InVal2);
4559	TestData.fMxcsrIn = State.MXCSR;
4560	TestData.fMxcsrOut = ResM.MXCSR;
4561	TestData.OutVal = ResM.uResult;
4562	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4563
4564	State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
4565	IEMSSERESULT ResU; RT_ZERO(ResU);
4566	pfn(&State, &ResU, &TestData.InVal1, &TestData.InVal2);
4567	TestData.fMxcsrIn = State.MXCSR;
4568	TestData.fMxcsrOut = ResU.MXCSR;
4569	TestData.OutVal = ResU.uResult;
4570	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4571
4572	uint16_t fXcpt = (ResM.MXCSR \| ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
4573	if (fXcpt)
4574	{
4575	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
4576	IEMSSERESULT Res1; RT_ZERO(Res1);
4577	pfn(&State, &Res1, &TestData.InVal1, &TestData.InVal2);
4578	TestData.fMxcsrIn = State.MXCSR;
4579	TestData.fMxcsrOut = Res1.MXCSR;
4580	TestData.OutVal = Res1.uResult;
4581	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4582
4583	if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
4584	{
4585	fXcpt \|= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
4586	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
4587	IEMSSERESULT Res2; RT_ZERO(Res2);
4588	pfn(&State, &Res2, &TestData.InVal1, &TestData.InVal2);
4589	TestData.fMxcsrIn = State.MXCSR;
4590	TestData.fMxcsrOut = Res2.MXCSR;
4591	TestData.OutVal = Res2.uResult;
4592	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4593	}
4594	if (!RT_IS_POWER_OF_TWO(fXcpt))
4595	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
4596	if (fUnmasked & fXcpt)
4597	{
4598	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
4599	IEMSSERESULT Res3; RT_ZERO(Res3);
4600	pfn(&State, &Res3, &TestData.InVal1, &TestData.InVal2);
4601	TestData.fMxcsrIn = State.MXCSR;
4602	TestData.fMxcsrOut = Res3.MXCSR;
4603	TestData.OutVal = Res3.uResult;
4604	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4605	}
4606	}
4607	}
4608	}
4609	rc = RTStrmClose(pStrmOut);
4610	if (RT_FAILURE(rc))
4611	{
4612	RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR32[iFn].pszName, rc);
4613	return RTEXITCODE_FAILURE;
4614	}
4615	}
4616
4617	return RTEXITCODE_SUCCESS;
4618	}
4619	#endif
4620
4621	static void SseBinaryR32Test(void)
4622	{
4623	X86FXSTATE State;
4624	RT_ZERO(State);
4625	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
4626	{
4627	if (!SubTestAndCheckIfEnabled(g_aSseBinaryR32[iFn].pszName))
4628	continue;
4629
4630	uint32_t const cTests = *g_aSseBinaryR32[iFn].pcTests;
4631	SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR32[iFn].paTests;
4632	PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR32[iFn].pfn;
4633	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32[iFn]);
4634	if (!cTests) RTTestSkipped(g_hTest, "no tests");
4635	for (uint32_t iVar = 0; iVar < cVars; iVar++)
4636	{
4637	for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
4638	{
4639	IEMSSERESULT Res; RT_ZERO(Res);
4640
4641	State.MXCSR = paTests[iTest].fMxcsrIn;
4642	pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
4643	bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
4644	&& RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
4645	&& RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
4646	&& RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]);
4647	if ( Res.MXCSR != paTests[iTest].fMxcsrOut
4648	\|\| !fValsIdentical)
4649	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s\n"
4650	"%s -> mxcsr=%#08x %s'%s'%s'%s\n"
4651	"%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
4652	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
4653	FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
4654	FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
4655	FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
4656	FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
4657	iVar ? " " : "", Res.MXCSR,
4658	FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
4659	FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
4660	iVar ? " " : "", paTests[iTest].fMxcsrOut,
4661	FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
4662	FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
4663	MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
4664	!fValsIdentical ? " - val" : "",
4665	FormatMxcsr(paTests[iTest].fMxcsrIn) );
4666	}
4667	pfn = g_aSseBinaryR32[iFn].pfnNative;
4668	}
4669	}
4670	}
4671
4672
4673	/*
4674	* Binary SSE operations on packed single precision floating point values.
4675	*/
4676	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
4677
4678	static const SSE_BINARY_R64_T g_aSseBinaryR64[] =
4679	{
4680	ENTRY_BIN(addpd_u128),
4681	ENTRY_BIN(mulpd_u128),
4682	ENTRY_BIN(subpd_u128),
4683	ENTRY_BIN(minpd_u128),
4684	ENTRY_BIN(divpd_u128),
4685	ENTRY_BIN(maxpd_u128),
4686	ENTRY_BIN(haddpd_u128),
4687	ENTRY_BIN(hsubpd_u128),
4688	ENTRY_BIN(sqrtpd_u128),
4689	ENTRY_BIN(addsubpd_u128),
4690	ENTRY_BIN(cvtpd2ps_u128),
4691	};
4692
4693	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4694	static RTEXITCODE SseBinaryR64Generate(const char *pszDataFileFmt, uint32_t cTests)
4695	{
4696	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4697
4698	static struct { RTFLOAT64U aVal1[2], aVal2[2]; } const s_aSpecials[] =
4699	{
4700	{ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
4701	{ RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1), RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) } },
4702	/** @todo More specials. */
4703	};
4704
4705	X86FXSTATE State;
4706	RT_ZERO(State);
4707	uint32_t cMinNormalPairs = (cTests - 144) / 4;
4708	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
4709	{
4710	PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR64[iFn].pfnNative ? g_aSseBinaryR64[iFn].pfnNative : g_aSseBinaryR64[iFn].pfn;
4711
4712	PRTSTREAM pStrmOut = NULL;
4713	int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR64[iFn].pszName);
4714	if (RT_FAILURE(rc))
4715	{
4716	RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR64[iFn].pszName, rc);
4717	return RTEXITCODE_FAILURE;
4718	}
4719
4720	uint32_t cNormalInputPairs = 0;
4721	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4722	{
4723	SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
4724
4725	TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4726	TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4727	TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4728	TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4729
4730	if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
4731	&& RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
4732	cNormalInputPairs++;
4733	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4734	{
4735	iTest -= 1;
4736	continue;
4737	}
4738
4739	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
4740	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4741	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
4742	for (uint8_t iFz = 0; iFz < 2; iFz++)
4743	{
4744	State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
4745	\| (iRounding << X86_MXCSR_RC_SHIFT)
4746	\| (iDaz ? X86_MXCSR_DAZ : 0)
4747	\| (iFz ? X86_MXCSR_FZ : 0)
4748	\| X86_MXCSR_XCPT_MASK;
4749	IEMSSERESULT ResM; RT_ZERO(ResM);
4750	pfn(&State, &ResM, &TestData.InVal1, &TestData.InVal2);
4751	TestData.fMxcsrIn = State.MXCSR;
4752	TestData.fMxcsrOut = ResM.MXCSR;
4753	TestData.OutVal = ResM.uResult;
4754	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4755
4756	State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
4757	IEMSSERESULT ResU; RT_ZERO(ResU);
4758	pfn(&State, &ResU, &TestData.InVal1, &TestData.InVal2);
4759	TestData.fMxcsrIn = State.MXCSR;
4760	TestData.fMxcsrOut = ResU.MXCSR;
4761	TestData.OutVal = ResU.uResult;
4762	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4763
4764	uint16_t fXcpt = (ResM.MXCSR \| ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
4765	if (fXcpt)
4766	{
4767	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
4768	IEMSSERESULT Res1; RT_ZERO(Res1);
4769	pfn(&State, &Res1, &TestData.InVal1, &TestData.InVal2);
4770	TestData.fMxcsrIn = State.MXCSR;
4771	TestData.fMxcsrOut = Res1.MXCSR;
4772	TestData.OutVal = Res1.uResult;
4773	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4774
4775	if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
4776	{
4777	fXcpt \|= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
4778	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
4779	IEMSSERESULT Res2; RT_ZERO(Res2);
4780	pfn(&State, &Res2, &TestData.InVal1, &TestData.InVal2);
4781	TestData.fMxcsrIn = State.MXCSR;
4782	TestData.fMxcsrOut = Res2.MXCSR;
4783	TestData.OutVal = Res2.uResult;
4784	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4785	}
4786	if (!RT_IS_POWER_OF_TWO(fXcpt))
4787	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
4788	if (fUnmasked & fXcpt)
4789	{
4790	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
4791	IEMSSERESULT Res3; RT_ZERO(Res3);
4792	pfn(&State, &Res3, &TestData.InVal1, &TestData.InVal2);
4793	TestData.fMxcsrIn = State.MXCSR;
4794	TestData.fMxcsrOut = Res3.MXCSR;
4795	TestData.OutVal = Res3.uResult;
4796	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4797	}
4798	}
4799	}
4800	}
4801	rc = RTStrmClose(pStrmOut);
4802	if (RT_FAILURE(rc))
4803	{
4804	RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR64[iFn].pszName, rc);
4805	return RTEXITCODE_FAILURE;
4806	}
4807	}
4808
4809	return RTEXITCODE_SUCCESS;
4810	}
4811	#endif
4812
4813
4814	static void SseBinaryR64Test(void)
4815	{
4816	X86FXSTATE State;
4817	RT_ZERO(State);
4818	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
4819	{
4820	if (!SubTestAndCheckIfEnabled(g_aSseBinaryR64[iFn].pszName))
4821	continue;
4822
4823	uint32_t const cTests = *g_aSseBinaryR64[iFn].pcTests;
4824	SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR64[iFn].paTests;
4825	PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR64[iFn].pfn;
4826	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64[iFn]);
4827	if (!cTests) RTTestSkipped(g_hTest, "no tests");
4828	for (uint32_t iVar = 0; iVar < cVars; iVar++)
4829	{
4830	for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
4831	{
4832	IEMSSERESULT Res; RT_ZERO(Res);
4833
4834	State.MXCSR = paTests[iTest].fMxcsrIn;
4835	pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
4836	if ( Res.MXCSR != paTests[iTest].fMxcsrOut
4837	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
4838	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
4839	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s\n"
4840	"%s -> mxcsr=%#08x %s'%s\n"
4841	"%s expected %#08x %s'%s%s%s (%s)\n",
4842	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
4843	FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
4844	FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
4845	iVar ? " " : "", Res.MXCSR,
4846	FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
4847	iVar ? " " : "", paTests[iTest].fMxcsrOut,
4848	FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
4849	MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
4850	( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
4851	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
4852	? " - val" : "",
4853	FormatMxcsr(paTests[iTest].fMxcsrIn) );
4854	}
4855	pfn = g_aSseBinaryR64[iFn].pfnNative;
4856	}
4857	}
4858	}
4859
4860
4861	/*
4862	* Binary SSE operations on packed single precision floating point values.
4863	*/
4864	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R32_T, SSE_BINARY_U128_R32_TEST_T, PFNIEMAIMPLFPSSEF2U128R32);
4865
4866	static const SSE_BINARY_U128_R32_T g_aSseBinaryU128R32[] =
4867	{
4868	ENTRY_BIN(addss_u128_r32),
4869	ENTRY_BIN(mulss_u128_r32),
4870	ENTRY_BIN(subss_u128_r32),
4871	ENTRY_BIN(minss_u128_r32),
4872	ENTRY_BIN(divss_u128_r32),
4873	ENTRY_BIN(maxss_u128_r32),
4874	ENTRY_BIN(cvtss2sd_u128_r32),
4875	ENTRY_BIN(sqrtss_u128_r32),
4876	};
4877
4878	#ifdef TSTIEMAIMPL_WITH_GENERATOR
4879	static RTEXITCODE SseBinaryU128R32Generate(const char *pszDataFileFmt, uint32_t cTests)
4880	{
4881	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4882
4883	static struct { RTFLOAT32U aVal1[4], Val2; } const s_aSpecials[] =
4884	{
4885	{ { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), }, RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
4886	/** @todo More specials. */
4887	};
4888
4889	X86FXSTATE State;
4890	RT_ZERO(State);
4891	uint32_t cMinNormalPairs = (cTests - 144) / 4;
4892	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
4893	{
4894	PFNIEMAIMPLFPSSEF2U128R32 const pfn = g_aSseBinaryU128R32[iFn].pfnNative ? g_aSseBinaryU128R32[iFn].pfnNative : g_aSseBinaryU128R32[iFn].pfn;
4895
4896	PRTSTREAM pStrmOut = NULL;
4897	int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryU128R32[iFn].pszName);
4898	if (RT_FAILURE(rc))
4899	{
4900	RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryU128R32[iFn].pszName, rc);
4901	return RTEXITCODE_FAILURE;
4902	}
4903
4904	uint32_t cNormalInputPairs = 0;
4905	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4906	{
4907	SSE_BINARY_U128_R32_TEST_T TestData; RT_ZERO(TestData);
4908
4909	TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4910	TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
4911	TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
4912	TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
4913
4914	TestData.r32Val2 = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
4915
4916	if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
4917	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
4918	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
4919	&& RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
4920	&& RTFLOAT32U_IS_NORMAL(&TestData.r32Val2))
4921	cNormalInputPairs++;
4922	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4923	{
4924	iTest -= 1;
4925	continue;
4926	}
4927
4928	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
4929	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4930	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
4931	for (uint8_t iFz = 0; iFz < 2; iFz++)
4932	{
4933	State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
4934	\| (iRounding << X86_MXCSR_RC_SHIFT)
4935	\| (iDaz ? X86_MXCSR_DAZ : 0)
4936	\| (iFz ? X86_MXCSR_FZ : 0)
4937	\| X86_MXCSR_XCPT_MASK;
4938	IEMSSERESULT ResM; RT_ZERO(ResM);
4939	pfn(&State, &ResM, &TestData.InVal1, &TestData.r32Val2);
4940	TestData.fMxcsrIn = State.MXCSR;
4941	TestData.fMxcsrOut = ResM.MXCSR;
4942	TestData.OutVal = ResM.uResult;
4943	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4944
4945	State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
4946	IEMSSERESULT ResU; RT_ZERO(ResU);
4947	pfn(&State, &ResU, &TestData.InVal1, &TestData.r32Val2);
4948	TestData.fMxcsrIn = State.MXCSR;
4949	TestData.fMxcsrOut = ResU.MXCSR;
4950	TestData.OutVal = ResU.uResult;
4951	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4952
4953	uint16_t fXcpt = (ResM.MXCSR \| ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
4954	if (fXcpt)
4955	{
4956	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
4957	IEMSSERESULT Res1; RT_ZERO(Res1);
4958	pfn(&State, &Res1, &TestData.InVal1, &TestData.r32Val2);
4959	TestData.fMxcsrIn = State.MXCSR;
4960	TestData.fMxcsrOut = Res1.MXCSR;
4961	TestData.OutVal = Res1.uResult;
4962	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4963
4964	if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
4965	{
4966	fXcpt \|= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
4967	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
4968	IEMSSERESULT Res2; RT_ZERO(Res2);
4969	pfn(&State, &Res2, &TestData.InVal1, &TestData.r32Val2);
4970	TestData.fMxcsrIn = State.MXCSR;
4971	TestData.fMxcsrOut = Res2.MXCSR;
4972	TestData.OutVal = Res2.uResult;
4973	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4974	}
4975	if (!RT_IS_POWER_OF_TWO(fXcpt))
4976	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
4977	if (fUnmasked & fXcpt)
4978	{
4979	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
4980	IEMSSERESULT Res3; RT_ZERO(Res3);
4981	pfn(&State, &Res3, &TestData.InVal1, &TestData.r32Val2);
4982	TestData.fMxcsrIn = State.MXCSR;
4983	TestData.fMxcsrOut = Res3.MXCSR;
4984	TestData.OutVal = Res3.uResult;
4985	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4986	}
4987	}
4988	}
4989	}
4990	rc = RTStrmClose(pStrmOut);
4991	if (RT_FAILURE(rc))
4992	{
4993	RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryU128R32[iFn].pszName, rc);
4994	return RTEXITCODE_FAILURE;
4995	}
4996	}
4997
4998	return RTEXITCODE_SUCCESS;
4999	}
5000	#endif
5001
5002	static void SseBinaryU128R32Test(void)
5003	{
5004	X86FXSTATE State;
5005	RT_ZERO(State);
5006	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
5007	{
5008	if (!SubTestAndCheckIfEnabled(g_aSseBinaryU128R32[iFn].pszName))
5009	continue;
5010
5011	uint32_t const cTests = *g_aSseBinaryU128R32[iFn].pcTests;
5012	SSE_BINARY_U128_R32_TEST_T const * const paTests = g_aSseBinaryU128R32[iFn].paTests;
5013	PFNIEMAIMPLFPSSEF2U128R32 pfn = g_aSseBinaryU128R32[iFn].pfn;
5014	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R32[iFn]);
5015	if (!cTests) RTTestSkipped(g_hTest, "no tests");
5016	for (uint32_t iVar = 0; iVar < cVars; iVar++)
5017	{
5018	for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
5019	{
5020	IEMSSERESULT Res; RT_ZERO(Res);
5021
5022	State.MXCSR = paTests[iTest].fMxcsrIn;
5023	pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].r32Val2);
5024	bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
5025	&& RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
5026	&& RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
5027	&& RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]);
5028	if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5029	\|\| !fValsIdentical)
5030	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s\n"
5031	"%s -> mxcsr=%#08x %s'%s'%s'%s\n"
5032	"%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
5033	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5034	FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
5035	FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
5036	FormatR32(&paTests[iTest].r32Val2),
5037	iVar ? " " : "", Res.MXCSR,
5038	FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
5039	FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
5040	iVar ? " " : "", paTests[iTest].fMxcsrOut,
5041	FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
5042	FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
5043	MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5044	!fValsIdentical ? " - val" : "",
5045	FormatMxcsr(paTests[iTest].fMxcsrIn) );
5046	}
5047	}
5048	}
5049	}
5050
5051
5052	/*
5053	* Binary SSE operations on packed single precision floating point values (xxxsd xmm1, r/m64).
5054	*/
5055	TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R64_T, SSE_BINARY_U128_R64_TEST_T, PFNIEMAIMPLFPSSEF2U128R64);
5056
5057	static const SSE_BINARY_U128_R64_T g_aSseBinaryU128R64[] =
5058	{
5059	ENTRY_BIN(addsd_u128_r64),
5060	ENTRY_BIN(mulsd_u128_r64),
5061	ENTRY_BIN(subsd_u128_r64),
5062	ENTRY_BIN(minsd_u128_r64),
5063	ENTRY_BIN(divsd_u128_r64),
5064	ENTRY_BIN(maxsd_u128_r64),
5065	ENTRY_BIN(cvtsd2ss_u128_r64),
5066	ENTRY_BIN(sqrtsd_u128_r64),
5067	};
5068
5069	#ifdef TSTIEMAIMPL_WITH_GENERATOR
5070	static RTEXITCODE SseBinaryU128R64Generate(const char *pszDataFileFmt, uint32_t cTests)
5071	{
5072	cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5073
5074	static struct { RTFLOAT64U aVal1[2], Val2; } const s_aSpecials[] =
5075	{
5076	{ { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) }, RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
5077	/** @todo More specials. */
5078	};
5079
5080	X86FXSTATE State;
5081	RT_ZERO(State);
5082	uint32_t cMinNormalPairs = (cTests - 144) / 4;
5083	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
5084	{
5085	PFNIEMAIMPLFPSSEF2U128R64 const pfn = g_aSseBinaryU128R64[iFn].pfnNative ? g_aSseBinaryU128R64[iFn].pfnNative : g_aSseBinaryU128R64[iFn].pfn;
5086
5087	PRTSTREAM pStrmOut = NULL;
5088	int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryU128R64[iFn].pszName);
5089	if (RT_FAILURE(rc))
5090	{
5091	RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryU128R64[iFn].pszName, rc);
5092	return RTEXITCODE_FAILURE;
5093	}
5094
5095	uint32_t cNormalInputPairs = 0;
5096	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5097	{
5098	SSE_BINARY_U128_R64_TEST_T TestData; RT_ZERO(TestData);
5099
5100	TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5101	TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5102	TestData.r64Val2 = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
5103
5104	if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
5105	&& RTFLOAT64U_IS_NORMAL(&TestData.r64Val2))
5106	cNormalInputPairs++;
5107	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5108	{
5109	iTest -= 1;
5110	continue;
5111	}
5112
5113	uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5114	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5115	for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5116	for (uint8_t iFz = 0; iFz < 2; iFz++)
5117	{
5118	State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5119	\| (iRounding << X86_MXCSR_RC_SHIFT)
5120	\| (iDaz ? X86_MXCSR_DAZ : 0)
5121	\| (iFz ? X86_MXCSR_FZ : 0)
5122	\| X86_MXCSR_XCPT_MASK;
5123	IEMSSERESULT ResM; RT_ZERO(ResM);
5124	pfn(&State, &ResM, &TestData.InVal1, &TestData.r64Val2);
5125	TestData.fMxcsrIn = State.MXCSR;
5126	TestData.fMxcsrOut = ResM.MXCSR;
5127	TestData.OutVal = ResM.uResult;
5128	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5129
5130	State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5131	IEMSSERESULT ResU; RT_ZERO(ResU);
5132	pfn(&State, &ResU, &TestData.InVal1, &TestData.r64Val2);
5133	TestData.fMxcsrIn = State.MXCSR;
5134	TestData.fMxcsrOut = ResU.MXCSR;
5135	TestData.OutVal = ResU.uResult;
5136	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5137
5138	uint16_t fXcpt = (ResM.MXCSR \| ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
5139	if (fXcpt)
5140	{
5141	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| fXcpt;
5142	IEMSSERESULT Res1; RT_ZERO(Res1);
5143	pfn(&State, &Res1, &TestData.InVal1, &TestData.r64Val2);
5144	TestData.fMxcsrIn = State.MXCSR;
5145	TestData.fMxcsrOut = Res1.MXCSR;
5146	TestData.OutVal = Res1.uResult;
5147	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5148
5149	if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
5150	{
5151	fXcpt \|= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
5152	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5153	IEMSSERESULT Res2; RT_ZERO(Res2);
5154	pfn(&State, &Res2, &TestData.InVal1, &TestData.r64Val2);
5155	TestData.fMxcsrIn = State.MXCSR;
5156	TestData.fMxcsrOut = Res2.MXCSR;
5157	TestData.OutVal = Res2.uResult;
5158	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5159	}
5160	if (!RT_IS_POWER_OF_TWO(fXcpt))
5161	for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5162	if (fUnmasked & fXcpt)
5163	{
5164	State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) \| ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5165	IEMSSERESULT Res3; RT_ZERO(Res3);
5166	pfn(&State, &Res3, &TestData.InVal1, &TestData.r64Val2);
5167	TestData.fMxcsrIn = State.MXCSR;
5168	TestData.fMxcsrOut = Res3.MXCSR;
5169	TestData.OutVal = Res3.uResult;
5170	RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5171	}
5172	}
5173	}
5174	}
5175	rc = RTStrmClose(pStrmOut);
5176	if (RT_FAILURE(rc))
5177	{
5178	RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryU128R64[iFn].pszName, rc);
5179	return RTEXITCODE_FAILURE;
5180	}
5181	}
5182
5183	return RTEXITCODE_SUCCESS;
5184	}
5185	#endif
5186
5187
5188	static void SseBinaryU128R64Test(void)
5189	{
5190	X86FXSTATE State;
5191	RT_ZERO(State);
5192	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
5193	{
5194	if (!SubTestAndCheckIfEnabled(g_aSseBinaryU128R64[iFn].pszName))
5195	continue;
5196
5197	uint32_t const cTests = *g_aSseBinaryU128R64[iFn].pcTests;
5198	SSE_BINARY_U128_R64_TEST_T const * const paTests = g_aSseBinaryU128R64[iFn].paTests;
5199	PFNIEMAIMPLFPSSEF2U128R64 pfn = g_aSseBinaryU128R64[iFn].pfn;
5200	uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R64[iFn]);
5201	if (!cTests) RTTestSkipped(g_hTest, "no tests");
5202	for (uint32_t iVar = 0; iVar < cVars; iVar++)
5203	{
5204	for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_U128_R64_TEST_T); iTest++)
5205	{
5206	IEMSSERESULT Res; RT_ZERO(Res);
5207
5208	State.MXCSR = paTests[iTest].fMxcsrIn;
5209	pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].r64Val2);
5210	if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5211	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5212	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5213	RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s\n"
5214	"%s -> mxcsr=%#08x %s'%s\n"
5215	"%s expected %#08x %s'%s%s%s (%s)\n",
5216	iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5217	FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
5218	FormatR64(&paTests[iTest].r64Val2),
5219	iVar ? " " : "", Res.MXCSR,
5220	FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
5221	iVar ? " " : "", paTests[iTest].fMxcsrOut,
5222	FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
5223	MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5224	( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5225	\|\| !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5226	? " - val" : "",
5227	FormatMxcsr(paTests[iTest].fMxcsrIn) );
5228	}
5229	}
5230	}
5231	}
5232
5233
5234
5235	int main(int argc, char **argv)
5236	{
5237	int rc = RTR3InitExe(argc, &argv, 0);
5238	if (RT_FAILURE(rc))
5239	return RTMsgInitFailure(rc);
5240
5241	/*
5242	* Determin the host CPU.
5243	* If not using the IEMAllAImpl.asm code, this will be set to Intel.
5244	*/
5245	#if (defined(RT_ARCH_X86) \|\| defined(RT_ARCH_AMD64)) && !defined(IEM_WITHOUT_ASSEMBLY)
5246	g_idxCpuEflFlavour = ASMIsAmdCpu() \|\| ASMIsHygonCpu()
5247	? IEMTARGETCPU_EFL_BEHAVIOR_AMD
5248	: IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
5249	#else
5250	g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
5251	#endif
5252
5253	/*
5254	* Parse arguments.
5255	*/
5256	enum { kModeNotSet, kModeTest, kModeGenerate }
5257	enmMode = kModeNotSet;
5258	bool fInt = true;
5259	bool fFpuLdSt = true;
5260	bool fFpuBinary1 = true;
5261	bool fFpuBinary2 = true;
5262	bool fFpuOther = true;
5263	bool fCpuData = true;
5264	bool fCommonData = true;
5265	bool fSseFpBinary = true;
5266	uint32_t const cDefaultTests = 96;
5267	uint32_t cTests = cDefaultTests;
5268	RTGETOPTDEF const s_aOptions[] =
5269	{
5270	// mode:
5271	{ "--generate", 'g', RTGETOPT_REQ_NOTHING },
5272	{ "--test", 't', RTGETOPT_REQ_NOTHING },
5273	// test selection (both)
5274	{ "--all", 'a', RTGETOPT_REQ_NOTHING },
5275	{ "--none", 'z', RTGETOPT_REQ_NOTHING },
5276	{ "--zap", 'z', RTGETOPT_REQ_NOTHING },
5277	{ "--fpu-ld-st", 'F', RTGETOPT_REQ_NOTHING }, /* FPU stuff is upper case */
5278	{ "--fpu-load-store", 'F', RTGETOPT_REQ_NOTHING },
5279	{ "--fpu-binary-1", 'B', RTGETOPT_REQ_NOTHING },
5280	{ "--fpu-binary-2", 'P', RTGETOPT_REQ_NOTHING },
5281	{ "--fpu-other", 'O', RTGETOPT_REQ_NOTHING },
5282	{ "--sse-fp-binary", 'S', RTGETOPT_REQ_NOTHING },
5283	{ "--int", 'i', RTGETOPT_REQ_NOTHING },
5284	{ "--include", 'I', RTGETOPT_REQ_STRING },
5285	{ "--exclude", 'X', RTGETOPT_REQ_STRING },
5286	// generation parameters
5287	{ "--common", 'm', RTGETOPT_REQ_NOTHING },
5288	{ "--cpu", 'c', RTGETOPT_REQ_NOTHING },
5289	{ "--number-of-tests", 'n', RTGETOPT_REQ_UINT32 },
5290	{ "--verbose", 'v', RTGETOPT_REQ_NOTHING },
5291	{ "--quiet", 'q', RTGETOPT_REQ_NOTHING },
5292	};
5293
5294	RTGETOPTSTATE State;
5295	rc = RTGetOptInit(&State, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1, 0);
5296	AssertRCReturn(rc, RTEXITCODE_FAILURE);
5297
5298	RTGETOPTUNION ValueUnion;
5299	while ((rc = RTGetOpt(&State, &ValueUnion)))
5300	{
5301	switch (rc)
5302	{
5303	case 'g':
5304	enmMode = kModeGenerate;
5305	break;
5306	case 't':
5307	enmMode = kModeTest;
5308	break;
5309
5310	case 'a':
5311	fCpuData = true;
5312	fCommonData = true;
5313	fInt = true;
5314	fFpuLdSt = true;
5315	fFpuBinary1 = true;
5316	fFpuBinary2 = true;
5317	fFpuOther = true;
5318	fSseFpBinary = true;
5319	break;
5320	case 'z':
5321	fCpuData = false;
5322	fCommonData = false;
5323	fInt = false;
5324	fFpuLdSt = false;
5325	fFpuBinary1 = false;
5326	fFpuBinary2 = false;
5327	fFpuOther = false;
5328	fSseFpBinary = false;
5329	break;
5330
5331	case 'F':
5332	fFpuLdSt = true;
5333	break;
5334	case 'O':
5335	fFpuOther = true;
5336	break;
5337	case 'B':
5338	fFpuBinary1 = true;
5339	break;
5340	case 'P':
5341	fFpuBinary2 = true;
5342	break;
5343	case 'S':
5344	fSseFpBinary = true;
5345	break;
5346	case 'i':
5347	fInt = true;
5348	break;
5349
5350	case 'I':
5351	if (g_cIncludeTestPatterns >= RT_ELEMENTS(g_apszIncludeTestPatterns))
5352	return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many include patterns (max %zu)",
5353	RT_ELEMENTS(g_apszIncludeTestPatterns));
5354	g_apszIncludeTestPatterns[g_cIncludeTestPatterns++] = ValueUnion.psz;
5355	break;
5356	case 'X':
5357	if (g_cExcludeTestPatterns >= RT_ELEMENTS(g_apszExcludeTestPatterns))
5358	return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many exclude patterns (max %zu)",
5359	RT_ELEMENTS(g_apszExcludeTestPatterns));
5360	g_apszExcludeTestPatterns[g_cExcludeTestPatterns++] = ValueUnion.psz;
5361	break;
5362
5363	case 'm':
5364	fCommonData = true;
5365	break;
5366	case 'c':
5367	fCpuData = true;
5368	break;
5369	case 'n':
5370	cTests = ValueUnion.u32;
5371	break;
5372
5373	case 'q':
5374	g_cVerbosity = 0;
5375	break;
5376	case 'v':
5377	g_cVerbosity++;
5378	break;
5379
5380	case 'h':
5381	RTPrintf("usage: %s <-g\|-t> [options]\n"
5382	"\n"
5383	"Mode:\n"
5384	" -g, --generate\n"
5385	" Generate test data.\n"
5386	" -t, --test\n"
5387	" Execute tests.\n"
5388	"\n"
5389	"Test selection (both modes):\n"
5390	" -a, --all\n"
5391	" Enable all tests and generated test data. (default)\n"
5392	" -z, --zap, --none\n"
5393	" Disable all tests and test data types.\n"
5394	" -i, --int\n"
5395	" Enable non-FPU tests.\n"
5396	" -F, --fpu-ld-st\n"
5397	" Enable FPU load and store tests.\n"
5398	" -B, --fpu-binary-1\n"
5399	" Enable FPU binary 80-bit FP tests.\n"
5400	" -P, --fpu-binary-2\n"
5401	" Enable FPU binary 64- and 32-bit FP tests.\n"
5402	" -O, --fpu-other\n"
5403	" Enable FPU binary 64- and 32-bit FP tests.\n"
5404	" -S, --sse-fp-binary\n"
5405	" Enable SSE binary 64- and 32-bit FP tests.\n"
5406	" -I,--include=<test-patter>\n"
5407	" Enable tests matching the given pattern.\n"
5408	" -X,--exclude=<test-patter>\n"
5409	" Skip tests matching the given pattern (overrides --include).\n"
5410	"\n"
5411	"Generation:\n"
5412	" -m, --common\n"
5413	" Enable generating common test data.\n"
5414	" -c, --only-cpu\n"
5415	" Enable generating CPU specific test data.\n"
5416	" -n, --number-of-test <count>\n"
5417	" Number of tests to generate. Default: %u\n"
5418	"\n"
5419	"Other:\n"
5420	" -v, --verbose\n"
5421	" -q, --quiet\n"
5422	" Noise level. Default: --quiet\n"
5423	, argv[0], cDefaultTests);
5424	return RTEXITCODE_SUCCESS;
5425	default:
5426	return RTGetOptPrintError(rc, &ValueUnion);
5427	}
5428	}
5429
5430	/*
5431	* Generate data?
5432	*/
5433	if (enmMode == kModeGenerate)
5434	{
5435	#ifdef TSTIEMAIMPL_WITH_GENERATOR
5436	char szCpuDesc[256] = {0};
5437	RTMpGetDescription(NIL_RTCPUID, szCpuDesc, sizeof(szCpuDesc));
5438	const char * const pszCpuType = g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD ? "Amd" : "Intel";
5439	# if defined(RT_OS_WINDOWS) \|\| defined(RT_OS_OS2)
5440	const char * const pszBitBucket = "NUL";
5441	# else
5442	const char * const pszBitBucket = "/dev/null";
5443	# endif
5444
5445	if (cTests == 0)
5446	cTests = cDefaultTests;
5447	g_cZeroDstTests = RT_MIN(cTests / 16, 32);
5448	g_cZeroSrcTests = g_cZeroDstTests * 2;
5449
5450	if (fInt)
5451	{
5452	const char *pszDataFile = fCommonData ? "tstIEMAImplDataInt.cpp" : pszBitBucket;
5453	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
5454	const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
5455	? "tstIEMAImplDataInt-Amd.cpp" : "tstIEMAImplDataInt-Intel.cpp";
5456	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
5457	if (!pStrmData \|\| !pStrmDataCpu)
5458	return RTEXITCODE_FAILURE;
5459
5460	BinU8Generate( pStrmData, pStrmDataCpu, cTests);
5461	BinU16Generate(pStrmData, pStrmDataCpu, cTests);
5462	BinU32Generate(pStrmData, pStrmDataCpu, cTests);
5463	BinU64Generate(pStrmData, pStrmDataCpu, cTests);
5464	ShiftDblGenerate(pStrmDataCpu, RT_MAX(cTests, 128));
5465	UnaryGenerate(pStrmData, cTests);
5466	ShiftGenerate(pStrmDataCpu, cTests);
5467	MulDivGenerate(pStrmDataCpu, cTests);
5468
5469	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
5470	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
5471	if (rcExit != RTEXITCODE_SUCCESS)
5472	return rcExit;
5473	}
5474
5475	if (fFpuLdSt)
5476	{
5477	const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuLdSt.cpp" : pszBitBucket;
5478	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
5479	const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
5480	? "tstIEMAImplDataFpuLdSt-Amd.cpp" : "tstIEMAImplDataFpuLdSt-Intel.cpp";
5481	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
5482	if (!pStrmData \|\| !pStrmDataCpu)
5483	return RTEXITCODE_FAILURE;
5484
5485	FpuLdConstGenerate(pStrmData, cTests);
5486	FpuLdIntGenerate(pStrmData, cTests);
5487	FpuLdD80Generate(pStrmData, cTests);
5488	FpuStIntGenerate(pStrmData, pStrmDataCpu, cTests);
5489	FpuStD80Generate(pStrmData, cTests);
5490	uint32_t const cTests2 = RT_MAX(cTests, 384); /* need better coverage for the next ones. */
5491	FpuLdMemGenerate(pStrmData, cTests2);
5492	FpuStMemGenerate(pStrmData, cTests2);
5493
5494	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
5495	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
5496	if (rcExit != RTEXITCODE_SUCCESS)
5497	return rcExit;
5498	}
5499
5500	if (fFpuBinary1)
5501	{
5502	const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary1.cpp" : pszBitBucket;
5503	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
5504	const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
5505	? "tstIEMAImplDataFpuBinary1-Amd.cpp" : "tstIEMAImplDataFpuBinary1-Intel.cpp";
5506	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
5507	if (!pStrmData \|\| !pStrmDataCpu)
5508	return RTEXITCODE_FAILURE;
5509
5510	FpuBinaryR80Generate(pStrmData, pStrmDataCpu, cTests);
5511	FpuBinaryFswR80Generate(pStrmData, cTests);
5512	FpuBinaryEflR80Generate(pStrmData, cTests);
5513
5514	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
5515	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
5516	if (rcExit != RTEXITCODE_SUCCESS)
5517	return rcExit;
5518	}
5519
5520	if (fFpuBinary2)
5521	{
5522	const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary2.cpp" : pszBitBucket;
5523	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
5524	const char pszDataCpuFile = pszBitBucket; /!fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
5525	? "tstIEMAImplDataFpuBinary2-Amd.cpp" : "tstIEMAImplDataFpuBinary2-Intel.cpp"; */
5526	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
5527	if (!pStrmData \|\| !pStrmDataCpu)
5528	return RTEXITCODE_FAILURE;
5529
5530	FpuBinaryR64Generate(pStrmData, cTests);
5531	FpuBinaryR32Generate(pStrmData, cTests);
5532	FpuBinaryI32Generate(pStrmData, cTests);
5533	FpuBinaryI16Generate(pStrmData, cTests);
5534	FpuBinaryFswR64Generate(pStrmData, cTests);
5535	FpuBinaryFswR32Generate(pStrmData, cTests);
5536	FpuBinaryFswI32Generate(pStrmData, cTests);
5537	FpuBinaryFswI16Generate(pStrmData, cTests);
5538
5539	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
5540	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
5541	if (rcExit != RTEXITCODE_SUCCESS)
5542	return rcExit;
5543	}
5544
5545	if (fFpuOther)
5546	{
5547	const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuOther.cpp" : pszBitBucket;
5548	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
5549	const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
5550	? "tstIEMAImplDataFpuOther-Amd.cpp" : "tstIEMAImplDataFpuOther-Intel.cpp";
5551	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
5552	if (!pStrmData \|\| !pStrmDataCpu)
5553	return RTEXITCODE_FAILURE;
5554
5555	FpuUnaryR80Generate(pStrmData, pStrmDataCpu, cTests);
5556	FpuUnaryFswR80Generate(pStrmData, pStrmDataCpu, cTests);
5557	FpuUnaryTwoR80Generate(pStrmData, pStrmDataCpu, cTests);
5558
5559	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
5560	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
5561	if (rcExit != RTEXITCODE_SUCCESS)
5562	return rcExit;
5563	}
5564
5565	if (fSseFpBinary)
5566	{
5567	const char *pszDataFileFmt = fCommonData ? "tstIEMAImplDataSseBinary-%s.bin" : pszBitBucket;
5568
5569	RTEXITCODE rcExit = SseBinaryR32Generate(pszDataFileFmt, cTests);
5570	if (rcExit == RTEXITCODE_SUCCESS)
5571	rcExit = SseBinaryR64Generate(pszDataFileFmt, cTests);
5572	if (rcExit == RTEXITCODE_SUCCESS)
5573	rcExit = SseBinaryU128R32Generate(pszDataFileFmt, cTests);
5574	if (rcExit == RTEXITCODE_SUCCESS)
5575	rcExit = SseBinaryU128R64Generate(pszDataFileFmt, cTests);
5576	if (rcExit != RTEXITCODE_SUCCESS)
5577	return rcExit;
5578	}
5579
5580	return RTEXITCODE_SUCCESS;
5581	#else
5582	return RTMsgErrorExitFailure("Test data generator not compiled in!");
5583	#endif
5584	}
5585
5586	/*
5587	* Do testing. Currrently disabled by default as data needs to be checked
5588	* on both intel and AMD systems first.
5589	*/
5590	rc = RTTestCreate("tstIEMAimpl", &g_hTest);
5591	AssertRCReturn(rc, RTEXITCODE_FAILURE);
5592	if (enmMode == kModeTest)
5593	{
5594	RTTestBanner(g_hTest);
5595
5596	/* Allocate guarded memory for use in the tests. */
5597	#define ALLOC_GUARDED_VAR(a_puVar) do { \
5598	rc = RTTestGuardedAlloc(g_hTest, sizeof(a_puVar), sizeof(a_puVar), false /fHead/, (void **)&a_puVar); \
5599	if (RT_FAILURE(rc)) RTTestFailed(g_hTest, "Failed to allocate guarded mem: " #a_puVar); \
5600	} while (0)
5601	ALLOC_GUARDED_VAR(g_pu8);
5602	ALLOC_GUARDED_VAR(g_pu16);
5603	ALLOC_GUARDED_VAR(g_pu32);
5604	ALLOC_GUARDED_VAR(g_pu64);
5605	ALLOC_GUARDED_VAR(g_pu128);
5606	ALLOC_GUARDED_VAR(g_pu8Two);
5607	ALLOC_GUARDED_VAR(g_pu16Two);
5608	ALLOC_GUARDED_VAR(g_pu32Two);
5609	ALLOC_GUARDED_VAR(g_pu64Two);
5610	ALLOC_GUARDED_VAR(g_pu128Two);
5611	ALLOC_GUARDED_VAR(g_pfEfl);
5612	if (RTTestErrorCount(g_hTest) == 0)
5613	{
5614	if (fInt)
5615	{
5616	BinU8Test();
5617	BinU16Test();
5618	BinU32Test();
5619	BinU64Test();
5620	XchgTest();
5621	XaddTest();
5622	CmpXchgTest();
5623	CmpXchg8bTest();
5624	CmpXchg16bTest();
5625	ShiftDblTest();
5626	UnaryTest();
5627	ShiftTest();
5628	MulDivTest();
5629	BswapTest();
5630	}
5631
5632	if (fFpuLdSt)
5633	{
5634	FpuLoadConstTest();
5635	FpuLdMemTest();
5636	FpuLdIntTest();
5637	FpuLdD80Test();
5638	FpuStMemTest();
5639	FpuStIntTest();
5640	FpuStD80Test();
5641	}
5642
5643	if (fFpuBinary1)
5644	{
5645	FpuBinaryR80Test();
5646	FpuBinaryFswR80Test();
5647	FpuBinaryEflR80Test();
5648	}
5649
5650	if (fFpuBinary2)
5651	{
5652	FpuBinaryR64Test();
5653	FpuBinaryR32Test();
5654	FpuBinaryI32Test();
5655	FpuBinaryI16Test();
5656	FpuBinaryFswR64Test();
5657	FpuBinaryFswR32Test();
5658	FpuBinaryFswI32Test();
5659	FpuBinaryFswI16Test();
5660	}
5661
5662	if (fFpuOther)
5663	{
5664	FpuUnaryR80Test();
5665	FpuUnaryFswR80Test();
5666	FpuUnaryTwoR80Test();
5667	}
5668
5669	if (fSseFpBinary)
5670	{
5671	SseBinaryR32Test();
5672	SseBinaryR64Test();
5673	SseBinaryU128R32Test();
5674	SseBinaryU128R64Test();
5675	}
5676	}
5677	return RTTestSummaryAndDestroy(g_hTest);
5678	}
5679	return RTTestSkipAndDestroy(g_hTest, "unfinished testcase");
5680	}
5681

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp@ 96407

Download in other formats: