tstIEMAImpl.cpp@ 94439

Last change on this file since 94439 was 94423, checked in by vboxsync, 3 years ago
tstIEMAImpl: More tests where AMD and Intel differs a little (or a lot). bugref:9898
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 176.4 KB

Line
1	/* $Id: tstIEMAImpl.cpp 94423 2022-03-31 22:59:46Z vboxsync $ */
2	/** @file
3	* IEM Assembly Instruction Helper Testcase.
4	*/
5
6	/*
7	* Copyright (C) 2022 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.virtualbox.org. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*/
17
18
19	/*********************************************************************************************************************************
20	* Header Files *
21	*********************************************************************************************************************************/
22	#include "../include/IEMInternal.h"
23
24	#include <iprt/errcore.h>
25	#include <VBox/log.h>
26	#include <iprt/assert.h>
27	#include <iprt/ctype.h>
28	#include <iprt/getopt.h>
29	#include <iprt/initterm.h>
30	#include <iprt/message.h>
31	#include <iprt/mp.h>
32	#include <iprt/rand.h>
33	#include <iprt/stream.h>
34	#include <iprt/string.h>
35	#include <iprt/test.h>
36
37	#include "tstIEMAImpl.h"
38
39
40	/*********************************************************************************************************************************
41	* Defined Constants And Macros *
42	*********************************************************************************************************************************/
43	#define ENTRY(a_Name) ENTRY_EX(a_Name, 0)
44	#define ENTRY_EX(a_Name, a_uExtra) \
45	{ RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
46	g_aTests_ ## a_Name, &g_cTests_ ## a_Name, \
47	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
48
49	#define ENTRY_INTEL(a_Name, a_fEflUndef) ENTRY_INTEL_EX(a_Name, a_fEflUndef, 0)
50	#define ENTRY_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
51	{ RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
52	g_aTests_ ## a_Name ## _intel, &g_cTests_ ## a_Name ## _intel, \
53	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL }
54
55	#define ENTRY_AMD(a_Name, a_fEflUndef) ENTRY_AMD_EX(a_Name, a_fEflUndef, 0)
56	#define ENTRY_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
57	{ RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
58	g_aTests_ ## a_Name ## _amd, &g_cTests_ ## a_Name ## _amd, \
59	a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD }
60
61	#define TYPEDEF_SUBTEST_TYPE(a_TypeName, a_TestType, a_FunctionPtrType) \
62	typedef struct a_TypeName \
63	{ \
64	const char *pszName; \
65	a_FunctionPtrType pfn; \
66	a_FunctionPtrType pfnNative; \
67	a_TestType const *paTests; \
68	uint32_t const *pcTests; \
69	uint32_t uExtra; \
70	uint8_t idxCpuEflFlavour; \
71	} a_TypeName
72
73	#define COUNT_VARIATIONS(a_SubTest) \
74	(1 + ((a_SubTest).idxCpuEflFlavour == g_idxCpuEflFlavour && (a_SubTest).pfnNative) )
75
76
77	/*********************************************************************************************************************************
78	* Global Variables *
79	*********************************************************************************************************************************/
80	static RTTEST g_hTest;
81	static uint8_t g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
82	#ifdef TSTIEMAIMPL_WITH_GENERATOR
83	static uint32_t g_cZeroDstTests = 2;
84	static uint32_t g_cZeroSrcTests = 4;
85	#endif
86	static uint8_t g_pu8, g_pu8Two;
87	static uint16_t g_pu16, g_pu16Two;
88	static uint32_t g_pu32, g_pu32Two, *g_pfEfl;
89	static uint64_t g_pu64, g_pu64Two;
90	static RTUINT128U g_pu128, g_pu128Two;
91
92	static char g_aszBuf[16][256];
93	static unsigned g_idxBuf = 0;
94
95
96	/*********************************************************************************************************************************
97	* Internal Functions *
98	*********************************************************************************************************************************/
99	static const char *FormatR80(PCRTFLOAT80U pr80);
100	static const char *FormatR64(PCRTFLOAT64U pr64);
101	static const char *FormatR32(PCRTFLOAT32U pr32);
102
103
104	/*
105	* Random helpers.
106	*/
107
108	static uint32_t RandEFlags(void)
109	{
110	uint32_t fEfl = RTRandU32();
111	return (fEfl & X86_EFL_LIVE_MASK) \| X86_EFL_RA1_MASK;
112	}
113
114	#ifdef TSTIEMAIMPL_WITH_GENERATOR
115
116	static uint8_t RandU8(void)
117	{
118	return RTRandU32Ex(0, 0xff);
119	}
120
121
122	static uint16_t RandU16(void)
123	{
124	return RTRandU32Ex(0, 0xffff);
125	}
126
127
128	static uint32_t RandU32(void)
129	{
130	return RTRandU32();
131	}
132
133	#endif
134
135	static uint64_t RandU64(void)
136	{
137	return RTRandU64();
138	}
139
140
141	static RTUINT128U RandU128(void)
142	{
143	RTUINT128U Ret;
144	Ret.s.Hi = RTRandU64();
145	Ret.s.Lo = RTRandU64();
146	return Ret;
147	}
148
149	#ifdef TSTIEMAIMPL_WITH_GENERATOR
150
151	static uint8_t RandU8Dst(uint32_t iTest)
152	{
153	if (iTest < g_cZeroDstTests)
154	return 0;
155	return RandU8();
156	}
157
158
159	static uint8_t RandU8Src(uint32_t iTest)
160	{
161	if (iTest < g_cZeroSrcTests)
162	return 0;
163	return RandU8();
164	}
165
166
167	static uint16_t RandU16Dst(uint32_t iTest)
168	{
169	if (iTest < g_cZeroDstTests)
170	return 0;
171	return RandU16();
172	}
173
174
175	static uint16_t RandU16Src(uint32_t iTest)
176	{
177	if (iTest < g_cZeroSrcTests)
178	return 0;
179	return RandU16();
180	}
181
182
183	static uint32_t RandU32Dst(uint32_t iTest)
184	{
185	if (iTest < g_cZeroDstTests)
186	return 0;
187	return RandU32();
188	}
189
190
191	static uint32_t RandU32Src(uint32_t iTest)
192	{
193	if (iTest < g_cZeroSrcTests)
194	return 0;
195	return RandU32();
196	}
197
198
199	static uint64_t RandU64Dst(uint32_t iTest)
200	{
201	if (iTest < g_cZeroDstTests)
202	return 0;
203	return RandU64();
204	}
205
206
207	static uint64_t RandU64Src(uint32_t iTest)
208	{
209	if (iTest < g_cZeroSrcTests)
210	return 0;
211	return RandU64();
212	}
213
214
215	static int16_t RandI16Src(uint32_t iTest)
216	{
217	RT_NOREF(iTest);
218	return (int16_t)RandU16();
219	}
220
221
222	static int32_t RandI32Src(uint32_t iTest)
223	{
224	RT_NOREF(iTest);
225	return (int32_t)RandU32();
226	}
227
228
229	#if 0
230	static int64_t RandI64Src(uint32_t iTest)
231	{
232	RT_NOREF(iTest);
233	return (int64_t)RandU64();
234	}
235	#endif
236
237
238	static uint16_t RandFcw(void)
239	{
240	return RandU16() & ~X86_FCW_ZERO_MASK;
241	}
242
243
244	static uint16_t RandFsw(void)
245	{
246	AssertCompile((X86_FSW_C_MASK \| X86_FSW_XCPT_ES_MASK \| X86_FSW_TOP_MASK \| X86_FSW_B) == 0xffff);
247	return RandU16();
248	}
249
250
251	static void SafeR80FractionShift(PRTFLOAT80U pr80, uint8_t cShift)
252	{
253	if (pr80->sj64.uFraction >= RT_BIT_64(cShift))
254	pr80->sj64.uFraction >>= cShift;
255	else
256	pr80->sj64.uFraction = (cShift % 19) + 1;
257	}
258
259
260	static RTFLOAT80U RandR80Ex(unsigned cTarget = 80, bool fIntTarget = false)
261	{
262	Assert(cTarget == (!fIntTarget ? 80U : 16U) \|\| cTarget == 64U \|\| cTarget == 32U \|\| (cTarget == 59U && fIntTarget));
263
264	RTFLOAT80U r80;
265	r80.au64[0] = RandU64();
266	r80.au16[4] = RandU16();
267
268	/*
269	* Make it more likely that we get a good selection of special values.
270	*/
271	uint8_t bType = RandU8() & 0x1f;
272	if (bType == 0 \|\| bType == 1 \|\| bType == 2 \|\| bType == 3)
273	{
274	/* Zero (0), Pseudo-Infinity (1), Infinity (2), Indefinite (3). We only keep fSign here. */
275	r80.sj64.uExponent = bType == 0 ? 0 : 0x7fff;
276	r80.sj64.uFraction = bType <= 2 ? 0 : RT_BIT_64(62);
277	r80.sj64.fInteger = bType >= 2 ? 1 : 0;
278	AssertMsg(bType != 0 \|\| RTFLOAT80U_IS_ZERO(&r80), ("%s\n", FormatR80(&r80)));
279	AssertMsg(bType != 1 \|\| RTFLOAT80U_IS_PSEUDO_INF(&r80), ("%s\n", FormatR80(&r80)));
280	AssertMsg(bType != 2 \|\| RTFLOAT80U_IS_INF(&r80), ("%s\n", FormatR80(&r80)));
281	AssertMsg(bType != 3 \|\| RTFLOAT80U_IS_INDEFINITE(&r80), ("%s\n", FormatR80(&r80)));
282	}
283	else if (bType == 4 \|\| bType == 5 \|\| bType == 6 \|\| bType == 7)
284	{
285	/* Denormals (4,5) and Pseudo denormals (6,7) */
286	if (bType & 1)
287	SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
288	else if (r80.sj64.uFraction == 0 && bType < 6)
289	r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
290	r80.sj64.uExponent = 0;
291	r80.sj64.fInteger = bType >= 6;
292	AssertMsg(bType >= 6 \|\| RTFLOAT80U_IS_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
293	AssertMsg(bType < 6 \|\| RTFLOAT80U_IS_PSEUDO_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
294	}
295	else if (bType == 8 \|\| bType == 9)
296	{
297	/* Pseudo NaN. */
298	if (bType & 1)
299	SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
300	else if (r80.sj64.uFraction == 0 && !r80.sj64.fInteger)
301	r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
302	r80.sj64.uExponent = 0x7fff;
303	if (r80.sj64.fInteger)
304	r80.sj64.uFraction \|= RT_BIT_64(62);
305	else
306	r80.sj64.uFraction &= ~RT_BIT_64(62);
307	r80.sj64.fInteger = 0;
308	AssertMsg(RTFLOAT80U_IS_PSEUDO_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
309	AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
310	}
311	else if (bType == 10 \|\| bType == 11)
312	{
313	/* Quiet and signalling NaNs (using fInteger to pick which). */
314	if (bType & 1)
315	SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
316	else if (r80.sj64.uFraction == 0)
317	r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
318	r80.sj64.uExponent = 0x7fff;
319	if (r80.sj64.fInteger)
320	r80.sj64.uFraction \|= RT_BIT_64(62);
321	else
322	r80.sj64.uFraction &= ~RT_BIT_64(62);
323	r80.sj64.fInteger = 1;
324	AssertMsg(RTFLOAT80U_IS_SIGNALLING_NAN(&r80) \|\| RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
325	AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s\n", FormatR80(&r80)));
326	}
327	else if (bType == 12 \|\| bType == 13)
328	{
329	/* Unnormals */
330	if (bType & 1)
331	SafeR80FractionShift(&r80, RandU8() % 62);
332	r80.sj64.fInteger = 0;
333	if (r80.sj64.uExponent == RTFLOAT80U_EXP_MAX \|\| r80.sj64.uExponent == 0)
334	r80.sj64.uExponent = (uint16_t)RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 1);
335	AssertMsg(RTFLOAT80U_IS_UNNORMAL(&r80), ("%s\n", FormatR80(&r80)));
336	}
337	else if (bType < 24)
338	{
339	/* Make sure we have lots of normalized values. */
340	if (!fIntTarget)
341	{
342	const unsigned uMinExp = cTarget == 64 ? RTFLOAT80U_EXP_BIAS - RTFLOAT64U_EXP_BIAS
343	: cTarget == 32 ? RTFLOAT80U_EXP_BIAS - RTFLOAT32U_EXP_BIAS : 0;
344	const unsigned uMaxExp = cTarget == 64 ? uMinExp + RTFLOAT64U_EXP_MAX
345	: cTarget == 32 ? uMinExp + RTFLOAT32U_EXP_MAX : RTFLOAT80U_EXP_MAX;
346	r80.sj64.fInteger = 1;
347	if (r80.sj64.uExponent <= uMinExp)
348	r80.sj64.uExponent = uMinExp + 1;
349	else if (r80.sj64.uExponent >= uMaxExp)
350	r80.sj64.uExponent = uMaxExp - 1;
351
352	if (bType == 14)
353	{ /* All 1s is useful to testing rounding. Also try trigger special
354	behaviour by sometimes rounding out of range, while we're at it. */
355	r80.sj64.uFraction = RT_BIT_64(63) - 1;
356	uint8_t bExp = RandU8();
357	if ((bExp & 3) == 0)
358	r80.sj64.uExponent = uMaxExp - 1;
359	else if ((bExp & 3) == 1)
360	r80.sj64.uExponent = uMinExp + 1;
361	else if ((bExp & 3) == 2)
362	r80.sj64.uExponent = uMinExp - (bExp & 15); /* (small numbers are mapped to subnormal values) */
363	}
364	}
365	else
366	{
367	/* integer target: */
368	const unsigned uMinExp = RTFLOAT80U_EXP_BIAS;
369	const unsigned uMaxExp = RTFLOAT80U_EXP_BIAS + cTarget - 2;
370	r80.sj64.fInteger = 1;
371	if (r80.sj64.uExponent < uMinExp)
372	r80.sj64.uExponent = uMinExp;
373	else if (r80.sj64.uExponent > uMaxExp)
374	r80.sj64.uExponent = uMaxExp;
375
376	if (bType == 14)
377	{ /* All 1s is useful to testing rounding. Also try trigger special
378	behaviour by sometimes rounding out of range, while we're at it. */
379	r80.sj64.uFraction = RT_BIT_64(63) - 1;
380	uint8_t bExp = RandU8();
381	if ((bExp & 3) == 0)
382	r80.sj64.uExponent = uMaxExp;
383	else if ((bExp & 3) == 1)
384	r80.sj64.uFraction &= ~(RT_BIT_64(cTarget - 1 - r80.sj64.uExponent) - 1); /* no rounding */
385	}
386	}
387
388	AssertMsg(RTFLOAT80U_IS_NORMAL(&r80), ("%s\n", FormatR80(&r80)));
389	}
390	return r80;
391	}
392
393
394	static RTFLOAT80U RandR80Src(uint32_t iTest)
395	{
396	RT_NOREF(iTest);
397	return RandR80Ex();
398	}
399
400
401	static void SafeR64FractionShift(PRTFLOAT64U pr64, uint8_t cShift)
402	{
403	if (pr64->s64.uFraction >= RT_BIT_64(cShift))
404	pr64->s64.uFraction >>= cShift;
405	else
406	pr64->s64.uFraction = (cShift % 19) + 1;
407	}
408
409
410	static RTFLOAT64U RandR64Src(uint32_t iTest)
411	{
412	RT_NOREF(iTest);
413
414	RTFLOAT64U r64;
415	r64.u = RandU64();
416
417	/*
418	* Make it more likely that we get a good selection of special values.
419	* On average 6 out of 16 calls should return a special value.
420	*/
421	uint8_t bType = RandU8() & 0xf;
422	if (bType == 0 \|\| bType == 1)
423	{
424	/* 0 or Infinity. We only keep fSign here. */
425	r64.s.uExponent = bType == 0 ? 0 : 0x7ff;
426	r64.s.uFractionHigh = 0;
427	r64.s.uFractionLow = 0;
428	AssertMsg(bType != 0 \|\| RTFLOAT64U_IS_ZERO(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
429	AssertMsg(bType != 1 \|\| RTFLOAT64U_IS_INF(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
430	}
431	else if (bType == 2 \|\| bType == 3)
432	{
433	/* Subnormals */
434	if (bType == 3)
435	SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
436	else if (r64.s64.uFraction == 0)
437	r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
438	r64.s64.uExponent = 0;
439	AssertMsg(RTFLOAT64U_IS_SUBNORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
440	}
441	else if (bType == 4 \|\| bType == 5)
442	{
443	/* NaNs */
444	if (bType == 5)
445	SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
446	else if (r64.s64.uFraction == 0)
447	r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
448	r64.s64.uExponent = 0x7ff;
449	AssertMsg(RTFLOAT64U_IS_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
450	}
451	else if (bType < 12)
452	{
453	/* Make sure we have lots of normalized values. */
454	if (r64.s.uExponent == 0)
455	r64.s.uExponent = 1;
456	else if (r64.s.uExponent == 0x7ff)
457	r64.s.uExponent = 0x7fe;
458	AssertMsg(RTFLOAT64U_IS_NORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
459	}
460	return r64;
461	}
462
463
464	static void SafeR32FractionShift(PRTFLOAT32U pr32, uint8_t cShift)
465	{
466	if (pr32->s.uFraction >= RT_BIT_32(cShift))
467	pr32->s.uFraction >>= cShift;
468	else
469	pr32->s.uFraction = (cShift % 19) + 1;
470	}
471
472
473	static RTFLOAT32U RandR32Src(uint32_t iTest)
474	{
475	RT_NOREF(iTest);
476
477	RTFLOAT32U r32;
478	r32.u = RandU32();
479
480	/*
481	* Make it more likely that we get a good selection of special values.
482	* On average 6 out of 16 calls should return a special value.
483	*/
484	uint8_t bType = RandU8() & 0xf;
485	if (bType == 0 \|\| bType == 1)
486	{
487	/* 0 or Infinity. We only keep fSign here. */
488	r32.s.uExponent = bType == 0 ? 0 : 0xff;
489	r32.s.uFraction = 0;
490	AssertMsg(bType != 0 \|\| RTFLOAT32U_IS_ZERO(&r32), ("%s\n", FormatR32(&r32)));
491	AssertMsg(bType != 1 \|\| RTFLOAT32U_IS_INF(&r32), ("%s\n", FormatR32(&r32)));
492	}
493	else if (bType == 2 \|\| bType == 3)
494	{
495	/* Subnormals */
496	if (bType == 3)
497	SafeR32FractionShift(&r32, r32.s.uExponent % 22);
498	else if (r32.s.uFraction == 0)
499	r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
500	r32.s.uExponent = 0;
501	AssertMsg(RTFLOAT32U_IS_SUBNORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
502	}
503	else if (bType == 4 \|\| bType == 5)
504	{
505	/* NaNs */
506	if (bType == 5)
507	SafeR32FractionShift(&r32, r32.s.uExponent % 22);
508	else if (r32.s.uFraction == 0)
509	r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
510	r32.s.uExponent = 0xff;
511	AssertMsg(RTFLOAT32U_IS_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
512	}
513	else if (bType < 12)
514	{
515	/* Make sure we have lots of normalized values. */
516	if (r32.s.uExponent == 0)
517	r32.s.uExponent = 1;
518	else if (r32.s.uExponent == 0xff)
519	r32.s.uExponent = 0xfe;
520	AssertMsg(RTFLOAT32U_IS_NORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
521	}
522	return r32;
523	}
524
525
526	static RTPBCD80U RandD80Src(uint32_t iTest)
527	{
528	if (iTest < 3)
529	{
530	RTPBCD80U d80Zero = RTPBCD80U_INIT_ZERO(!(iTest & 1));
531	return d80Zero;
532	}
533	if (iTest < 5)
534	{
535	RTPBCD80U d80Ind = RTPBCD80U_INIT_INDEFINITE();
536	return d80Ind;
537	}
538
539	RTPBCD80U d80;
540	uint8_t b = RandU8();
541	d80.s.fSign = b & 1;
542
543	if ((iTest & 7) >= 6)
544	{
545	/* Illegal */
546	d80.s.uPad = (iTest & 7) == 7 ? b >> 1 : 0;
547	for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
548	d80.s.abPairs[iPair] = RandU8();
549	}
550	else
551	{
552	/* Normal */
553	d80.s.uPad = 0;
554	for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
555	{
556	uint8_t const uLo = (uint8_t)RTRandU32Ex(0, 9);
557	uint8_t const uHi = (uint8_t)RTRandU32Ex(0, 9);
558	d80.s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(uHi, uLo);
559	}
560	}
561	return d80;
562	}
563
564
565	const char *GenFormatR80(PCRTFLOAT80U plrd)
566	{
567	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
568	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT80U_INIT_C(%d,%#RX64,%u)",
569	plrd->s.fSign, plrd->s.uMantissa, plrd->s.uExponent);
570	return pszBuf;
571	}
572
573	const char *GenFormatR64(PCRTFLOAT64U prd)
574	{
575	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
576	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT64U_INIT_C(%d,%#RX64,%u)",
577	prd->s.fSign, RT_MAKE_U64(prd->s.uFractionLow, prd->s.uFractionHigh), prd->s.uExponent);
578	return pszBuf;
579	}
580
581
582	const char *GenFormatR32(PCRTFLOAT32U pr)
583	{
584	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
585	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT32U_INIT_C(%d,%#RX32,%u)", pr->s.fSign, pr->s.uFraction, pr->s.uExponent);
586	return pszBuf;
587	}
588
589
590	const char *GenFormatD80(PCRTPBCD80U pd80)
591	{
592	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
593	size_t off;
594	if (pd80->s.uPad == 0)
595	off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_C(%d", pd80->s.fSign);
596	else
597	off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_EX_C(%#x,%d", pd80->s.uPad, pd80->s.fSign);
598	size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
599	while (iPair-- > 0)
600	off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, ",%d,%d",
601	RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair]),
602	RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair]));
603	pszBuf[off++] = ')';
604	pszBuf[off++] = '\0';
605	return pszBuf;
606	}
607
608
609	const char *GenFormatI64(int64_t i64)
610	{
611	if (i64 == INT64_MIN) /* This one is problematic */
612	return "INT64_MIN";
613	if (i64 == INT64_MAX)
614	return "INT64_MAX";
615	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
616	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT64_C(%RI64)", i64);
617	return pszBuf;
618	}
619
620
621	const char GenFormatI64(int64_t const pi64)
622	{
623	return GenFormatI64(*pi64);
624	}
625
626
627	const char *GenFormatI32(int32_t i32)
628	{
629	if (i32 == INT32_MIN) /* This one is problematic */
630	return "INT32_MIN";
631	if (i32 == INT32_MAX)
632	return "INT32_MAX";
633	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
634	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT32_C(%RI32)", i32);
635	return pszBuf;
636	}
637
638
639	const char GenFormatI32(int32_t const pi32)
640	{
641	return GenFormatI32(*pi32);
642	}
643
644
645	const char *GenFormatI16(int16_t i16)
646	{
647	if (i16 == INT16_MIN) /* This one is problematic */
648	return "INT16_MIN";
649	if (i16 == INT16_MAX)
650	return "INT16_MAX";
651	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
652	RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT16_C(%RI16)", i16);
653	return pszBuf;
654	}
655
656
657	const char GenFormatI16(int16_t const pi16)
658	{
659	return GenFormatI16(*pi16);
660	}
661
662
663	static void GenerateHeader(PRTSTREAM pOut, const char pszCpuDesc, const char pszCpuType)
664	{
665	/* We want to tag the generated source code with the revision that produced it. */
666	static char s_szRev[] = "$Revision: 94423 $";
667	const char *pszRev = RTStrStripL(strchr(s_szRev, ':') + 1);
668	size_t cchRev = 0;
669	while (RT_C_IS_DIGIT(pszRev[cchRev]))
670	cchRev++;
671
672	RTStrmPrintf(pOut,
673	"/* $Id: tstIEMAImpl.cpp 94423 2022-03-31 22:59:46Z vboxsync $ */\n"
674	"/** @file\n"
675	" * IEM Assembly Instruction Helper Testcase Data%s%s - r%.*s on %s.\n"
676	" */\n"
677	"\n"
678	"/*\n"
679	" * Copyright (C) 2022 Oracle Corporation\n"
680	" *\n"
681	" * This file is part of VirtualBox Open Source Edition (OSE), as\n"
682	" * available from http://www.virtualbox.org. This file is free software;\n"
683	" * you can redistribute it and/or modify it under the terms of the GNU\n"
684	" * General Public License (GPL) as published by the Free Software\n"
685	" * Foundation, in version 2 as it comes in the \"COPYING\" file of the\n"
686	" * VirtualBox OSE distribution. VirtualBox OSE is distributed in the\n"
687	" * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.\n"
688	" */\n"
689	"\n"
690	"#include \"tstIEMAImpl.h\"\n"
691	"\n"
692	,
693	pszCpuType ? " " : "", pszCpuType ? pszCpuType : "", cchRev, pszRev, pszCpuDesc);
694	}
695
696
697	static PRTSTREAM GenerateOpenWithHdr(const char pszFilename, const char pszCpuDesc, const char *pszCpuType)
698	{
699	PRTSTREAM pOut = NULL;
700	int rc = RTStrmOpen(pszFilename, "w", &pOut);
701	if (RT_SUCCESS(rc))
702	{
703	GenerateHeader(pOut, pszCpuDesc, pszCpuType);
704	return pOut;
705	}
706	RTMsgError("Failed to open %s for writing: %Rrc", pszFilename, rc);
707	return NULL;
708	}
709
710
711	static RTEXITCODE GenerateFooterAndClose(PRTSTREAM pOut, const char *pszFilename, RTEXITCODE rcExit)
712	{
713	RTStrmPrintf(pOut,
714	"\n"
715	"/* end of file */\n");
716	int rc = RTStrmClose(pOut);
717	if (RT_SUCCESS(rc))
718	return rcExit;
719	return RTMsgErrorExitFailure("RTStrmClose failed on %s: %Rrc", pszFilename, rc);
720	}
721
722
723	static void GenerateArrayStart(PRTSTREAM pOut, const char pszName, const char pszType)
724	{
725	RTStrmPrintf(pOut, "%s const g_aTests_%s[] =\n{\n", pszType, pszName);
726	}
727
728
729	static void GenerateArrayEnd(PRTSTREAM pOut, const char *pszName)
730	{
731	RTStrmPrintf(pOut,
732	"};\n"
733	"uint32_t const g_cTests_%s = RT_ELEMENTS(g_aTests_%s);\n"
734	"\n",
735	pszName, pszName);
736	}
737
738	#endif /* TSTIEMAIMPL_WITH_GENERATOR */
739
740
741	/*
742	* Test helpers.
743	*/
744	static const char *EFlagsDiff(uint32_t fActual, uint32_t fExpected)
745	{
746	if (fActual == fExpected)
747	return "";
748
749	uint32_t const fXor = fActual ^ fExpected;
750	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
751	size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
752
753	static struct
754	{
755	const char *pszName;
756	uint32_t fFlag;
757	} const s_aFlags[] =
758	{
759	#define EFL_ENTRY(a_Flags) { #a_Flags, X86_EFL_ ## a_Flags }
760	EFL_ENTRY(CF),
761	EFL_ENTRY(PF),
762	EFL_ENTRY(AF),
763	EFL_ENTRY(ZF),
764	EFL_ENTRY(SF),
765	EFL_ENTRY(TF),
766	EFL_ENTRY(IF),
767	EFL_ENTRY(DF),
768	EFL_ENTRY(OF),
769	EFL_ENTRY(IOPL),
770	EFL_ENTRY(NT),
771	EFL_ENTRY(RF),
772	EFL_ENTRY(VM),
773	EFL_ENTRY(AC),
774	EFL_ENTRY(VIF),
775	EFL_ENTRY(VIP),
776	EFL_ENTRY(ID),
777	};
778	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
779	if (s_aFlags[i].fFlag & fXor)
780	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
781	s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
782	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
783	return pszBuf;
784	}
785
786
787	static const char *FswDiff(uint16_t fActual, uint16_t fExpected)
788	{
789	if (fActual == fExpected)
790	return "";
791
792	uint16_t const fXor = fActual ^ fExpected;
793	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
794	size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
795
796	static struct
797	{
798	const char *pszName;
799	uint32_t fFlag;
800	} const s_aFlags[] =
801	{
802	#define FSW_ENTRY(a_Flags) { #a_Flags, X86_FSW_ ## a_Flags }
803	FSW_ENTRY(IE),
804	FSW_ENTRY(DE),
805	FSW_ENTRY(ZE),
806	FSW_ENTRY(OE),
807	FSW_ENTRY(UE),
808	FSW_ENTRY(PE),
809	FSW_ENTRY(SF),
810	FSW_ENTRY(ES),
811	FSW_ENTRY(C0),
812	FSW_ENTRY(C1),
813	FSW_ENTRY(C2),
814	FSW_ENTRY(C3),
815	FSW_ENTRY(B),
816	};
817	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
818	if (s_aFlags[i].fFlag & fXor)
819	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
820	s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
821	if (fXor & X86_FSW_TOP_MASK)
822	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "/TOP%u!%u",
823	X86_FSW_TOP_GET(fActual), X86_FSW_TOP_GET(fExpected));
824	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
825	return pszBuf;
826	}
827
828
829	static const char *FormatFcw(uint16_t fFcw)
830	{
831	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
832
833	const char pszPC = NULL; / (msc+gcc are too stupid) */
834	switch (fFcw & X86_FCW_PC_MASK)
835	{
836	case X86_FCW_PC_24: pszPC = "PC24"; break;
837	case X86_FCW_PC_RSVD: pszPC = "PCRSVD!"; break;
838	case X86_FCW_PC_53: pszPC = "PC53"; break;
839	case X86_FCW_PC_64: pszPC = "PC64"; break;
840	}
841
842	const char pszRC = NULL; / (msc+gcc are too stupid) */
843	switch (fFcw & X86_FCW_RC_MASK)
844	{
845	case X86_FCW_RC_NEAREST: pszRC = "NEAR"; break;
846	case X86_FCW_RC_DOWN: pszRC = "DOWN"; break;
847	case X86_FCW_RC_UP: pszRC = "UP"; break;
848	case X86_FCW_RC_ZERO: pszRC = "ZERO"; break;
849	}
850	size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s %s", pszPC, pszRC);
851
852	static struct
853	{
854	const char *pszName;
855	uint32_t fFlag;
856	} const s_aFlags[] =
857	{
858	#define FCW_ENTRY(a_Flags) { #a_Flags, X86_FCW_ ## a_Flags }
859	FCW_ENTRY(IM),
860	FCW_ENTRY(DM),
861	FCW_ENTRY(ZM),
862	FCW_ENTRY(OM),
863	FCW_ENTRY(UM),
864	FCW_ENTRY(PM),
865	{ "6M", 64 },
866	};
867	for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
868	if (fFcw & s_aFlags[i].fFlag)
869	cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
870
871	RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
872	return pszBuf;
873	}
874
875
876	static const char *FormatR80(PCRTFLOAT80U pr80)
877	{
878	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
879	RTStrFormatR80(pszBuf, sizeof(g_aszBuf[0]), pr80, 0, 0, RTSTR_F_SPECIAL);
880	return pszBuf;
881	}
882
883
884	static const char *FormatR64(PCRTFLOAT64U pr64)
885	{
886	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
887	RTStrFormatR64(pszBuf, sizeof(g_aszBuf[0]), pr64, 0, 0, RTSTR_F_SPECIAL);
888	return pszBuf;
889	}
890
891
892	static const char *FormatR32(PCRTFLOAT32U pr32)
893	{
894	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
895	RTStrFormatR32(pszBuf, sizeof(g_aszBuf[0]), pr32, 0, 0, RTSTR_F_SPECIAL);
896	return pszBuf;
897	}
898
899
900	static const char *FormatD80(PCRTPBCD80U pd80)
901	{
902	/* There is only one indefinite endcoding (same as for 80-bit
903	floating point), so get it out of the way first: */
904	if (RTPBCD80U_IS_INDEFINITE(pd80))
905	return "Ind";
906
907	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
908	size_t off = 0;
909	pszBuf[off++] = pd80->s.fSign ? '-' : '+';
910	unsigned cBadDigits = 0;
911	size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
912	while (iPair-- > 0)
913	{
914	static const char s_szDigits[] = "0123456789abcdef";
915	static const uint8_t s_bBadDigits[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
916	pszBuf[off++] = s_szDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])];
917	pszBuf[off++] = s_szDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
918	cBadDigits += s_bBadDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])]
919	+ s_bBadDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
920	}
921	if (cBadDigits \|\| pd80->s.uPad != 0)
922	off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, "[%u,%#x]", cBadDigits, pd80->s.uPad);
923	pszBuf[off] = '\0';
924	return pszBuf;
925	}
926
927
928	#if 0
929	static const char FormatI64(int64_t const piVal)
930	{
931	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
932	RTStrFormatU64(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL \| RTSTR_F_VALSIGNED);
933	return pszBuf;
934	}
935	#endif
936
937
938	static const char FormatI32(int32_t const piVal)
939	{
940	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
941	RTStrFormatU32(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL \| RTSTR_F_VALSIGNED);
942	return pszBuf;
943	}
944
945
946	static const char FormatI16(int16_t const piVal)
947	{
948	char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
949	RTStrFormatU16(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL \| RTSTR_F_VALSIGNED);
950	return pszBuf;
951	}
952
953
954	/*
955	* Binary operations.
956	*/
957	TYPEDEF_SUBTEST_TYPE(BINU8_T, BINU8_TEST_T, PFNIEMAIMPLBINU8);
958	TYPEDEF_SUBTEST_TYPE(BINU16_T, BINU16_TEST_T, PFNIEMAIMPLBINU16);
959	TYPEDEF_SUBTEST_TYPE(BINU32_T, BINU32_TEST_T, PFNIEMAIMPLBINU32);
960	TYPEDEF_SUBTEST_TYPE(BINU64_T, BINU64_TEST_T, PFNIEMAIMPLBINU64);
961
962	#ifdef TSTIEMAIMPL_WITH_GENERATOR
963	# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
964	static void BinU ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
965	{ \
966	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aBinU ## a_cBits); iFn++) \
967	{ \
968	PFNIEMAIMPLBINU ## a_cBits const pfn = g_aBinU ## a_cBits[iFn].pfnNative \
969	? g_aBinU ## a_cBits[iFn].pfnNative : g_aBinU ## a_cBits[iFn].pfn; \
970	PRTSTREAM pOutFn = pOut; \
971	if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
972	{ \
973	if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
974	continue; \
975	pOutFn = pOutCpu; \
976	} \
977	\
978	GenerateArrayStart(pOutFn, g_aBinU ## a_cBits[iFn].pszName, #a_TestType); \
979	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
980	{ \
981	a_TestType Test; \
982	Test.fEflIn = RandEFlags(); \
983	Test.fEflOut = Test.fEflIn; \
984	Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
985	Test.uDstOut = Test.uDstIn; \
986	Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
987	if (g_aBinU ## a_cBits[iFn].uExtra) \
988	Test.uSrcIn &= a_cBits - 1; /* Restrict bit index according to operand width */ \
989	Test.uMisc = 0; \
990	pfn(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut); \
991	RTStrmPrintf(pOutFn, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %#x }, /* #%u */\n", \
992	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
993	} \
994	GenerateArrayEnd(pOutFn, g_aBinU ## a_cBits[iFn].pszName); \
995	} \
996	}
997	#else
998	# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType)
999	#endif
1000
1001	#define TEST_BINARY_OPS(a_cBits, a_uType, a_Fmt, a_TestType, a_aSubTests) \
1002	GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1003	\
1004	static void BinU ## a_cBits ## Test(void) \
1005	{ \
1006	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1007	{ \
1008	RTTestSub(g_hTest, a_aSubTests[iFn].pszName); \
1009	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1010	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1011	PFNIEMAIMPLBINU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1012	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1013	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1014	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1015	{ \
1016	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1017	{ \
1018	uint32_t fEfl = paTests[iTest].fEflIn; \
1019	a_uType uDst = paTests[iTest].uDstIn; \
1020	pfn(&uDst, paTests[iTest].uSrcIn, &fEfl); \
1021	if ( uDst != paTests[iTest].uDstOut \
1022	\|\| fEfl != paTests[iTest].fEflOut) \
1023	RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s - %s\n", \
1024	iTest, !iVar ? "" : "/n", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1025	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1026	EFlagsDiff(fEfl, paTests[iTest].fEflOut), \
1027	uDst == paTests[iTest].uDstOut ? "eflags" : fEfl == paTests[iTest].fEflOut ? "dst" : "both"); \
1028	else \
1029	{ \
1030	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1031	*g_pfEfl = paTests[iTest].fEflIn; \
1032	pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, g_pfEfl); \
1033	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1034	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1035	} \
1036	} \
1037	pfn = a_aSubTests[iFn].pfnNative; \
1038	} \
1039	} \
1040	}
1041
1042
1043	/*
1044	* 8-bit binary operations.
1045	*/
1046	static const BINU8_T g_aBinU8[] =
1047	{
1048	ENTRY(add_u8),
1049	ENTRY(add_u8_locked),
1050	ENTRY(adc_u8),
1051	ENTRY(adc_u8_locked),
1052	ENTRY(sub_u8),
1053	ENTRY(sub_u8_locked),
1054	ENTRY(sbb_u8),
1055	ENTRY(sbb_u8_locked),
1056	ENTRY(or_u8),
1057	ENTRY(or_u8_locked),
1058	ENTRY(xor_u8),
1059	ENTRY(xor_u8_locked),
1060	ENTRY(and_u8),
1061	ENTRY(and_u8_locked),
1062	ENTRY(cmp_u8),
1063	ENTRY(test_u8),
1064	};
1065	TEST_BINARY_OPS(8, uint8_t, "%#04x", BINU8_TEST_T, g_aBinU8)
1066
1067
1068	/*
1069	* 16-bit binary operations.
1070	*/
1071	static const BINU16_T g_aBinU16[] =
1072	{
1073	ENTRY(add_u16),
1074	ENTRY(add_u16_locked),
1075	ENTRY(adc_u16),
1076	ENTRY(adc_u16_locked),
1077	ENTRY(sub_u16),
1078	ENTRY(sub_u16_locked),
1079	ENTRY(sbb_u16),
1080	ENTRY(sbb_u16_locked),
1081	ENTRY(or_u16),
1082	ENTRY(or_u16_locked),
1083	ENTRY(xor_u16),
1084	ENTRY(xor_u16_locked),
1085	ENTRY(and_u16),
1086	ENTRY(and_u16_locked),
1087	ENTRY(cmp_u16),
1088	ENTRY(test_u16),
1089	ENTRY_EX(bt_u16, 1),
1090	ENTRY_EX(btc_u16, 1),
1091	ENTRY_EX(btc_u16_locked, 1),
1092	ENTRY_EX(btr_u16, 1),
1093	ENTRY_EX(btr_u16_locked, 1),
1094	ENTRY_EX(bts_u16, 1),
1095	ENTRY_EX(bts_u16_locked, 1),
1096	ENTRY_AMD( bsf_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1097	ENTRY_INTEL(bsf_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1098	ENTRY_AMD( bsr_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1099	ENTRY_INTEL(bsr_u16, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1100	ENTRY_AMD( imul_two_u16, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1101	ENTRY_INTEL(imul_two_u16, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1102	ENTRY(arpl),
1103	};
1104	TEST_BINARY_OPS(16, uint16_t, "%#06x", BINU16_TEST_T, g_aBinU16)
1105
1106
1107	/*
1108	* 32-bit binary operations.
1109	*/
1110	static const BINU32_T g_aBinU32[] =
1111	{
1112	ENTRY(add_u32),
1113	ENTRY(add_u32_locked),
1114	ENTRY(adc_u32),
1115	ENTRY(adc_u32_locked),
1116	ENTRY(sub_u32),
1117	ENTRY(sub_u32_locked),
1118	ENTRY(sbb_u32),
1119	ENTRY(sbb_u32_locked),
1120	ENTRY(or_u32),
1121	ENTRY(or_u32_locked),
1122	ENTRY(xor_u32),
1123	ENTRY(xor_u32_locked),
1124	ENTRY(and_u32),
1125	ENTRY(and_u32_locked),
1126	ENTRY(cmp_u32),
1127	ENTRY(test_u32),
1128	ENTRY_EX(bt_u32, 1),
1129	ENTRY_EX(btc_u32, 1),
1130	ENTRY_EX(btc_u32_locked, 1),
1131	ENTRY_EX(btr_u32, 1),
1132	ENTRY_EX(btr_u32_locked, 1),
1133	ENTRY_EX(bts_u32, 1),
1134	ENTRY_EX(bts_u32_locked, 1),
1135	ENTRY_AMD( bsf_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1136	ENTRY_INTEL(bsf_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1137	ENTRY_AMD( bsr_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1138	ENTRY_INTEL(bsr_u32, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1139	ENTRY_AMD( imul_two_u32, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1140	ENTRY_INTEL(imul_two_u32, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1141	};
1142	TEST_BINARY_OPS(32, uint32_t, "%#010RX32", BINU32_TEST_T, g_aBinU32)
1143
1144
1145	/*
1146	* 64-bit binary operations.
1147	*/
1148	static const BINU64_T g_aBinU64[] =
1149	{
1150	ENTRY(add_u64),
1151	ENTRY(add_u64_locked),
1152	ENTRY(adc_u64),
1153	ENTRY(adc_u64_locked),
1154	ENTRY(sub_u64),
1155	ENTRY(sub_u64_locked),
1156	ENTRY(sbb_u64),
1157	ENTRY(sbb_u64_locked),
1158	ENTRY(or_u64),
1159	ENTRY(or_u64_locked),
1160	ENTRY(xor_u64),
1161	ENTRY(xor_u64_locked),
1162	ENTRY(and_u64),
1163	ENTRY(and_u64_locked),
1164	ENTRY(cmp_u64),
1165	ENTRY(test_u64),
1166	ENTRY_EX(bt_u64, 1),
1167	ENTRY_EX(btc_u64, 1),
1168	ENTRY_EX(btc_u64_locked, 1),
1169	ENTRY_EX(btr_u64, 1),
1170	ENTRY_EX(btr_u64_locked, 1),
1171	ENTRY_EX(bts_u64, 1),
1172	ENTRY_EX(bts_u64_locked, 1),
1173	ENTRY_AMD( bsf_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1174	ENTRY_INTEL(bsf_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1175	ENTRY_AMD( bsr_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1176	ENTRY_INTEL(bsr_u64, X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF),
1177	ENTRY_AMD( imul_two_u64, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1178	ENTRY_INTEL(imul_two_u64, X86_EFL_PF \| X86_EFL_AF \| X86_EFL_ZF \| X86_EFL_SF),
1179	};
1180	TEST_BINARY_OPS(64, uint64_t, "%#018RX64", BINU64_TEST_T, g_aBinU64)
1181
1182
1183	/*
1184	* XCHG
1185	*/
1186	static void XchgTest(void)
1187	{
1188	RTTestSub(g_hTest, "xchg");
1189	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU8, (uint8_t pu8Mem, uint8_t pu8Reg));
1190	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU16,(uint16_t pu16Mem, uint16_t pu16Reg));
1191	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU32,(uint32_t pu32Mem, uint32_t pu32Reg));
1192	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU64,(uint64_t pu64Mem, uint64_t pu64Reg));
1193
1194	static struct
1195	{
1196	uint8_t cb; uint64_t fMask;
1197	union
1198	{
1199	uintptr_t pfn;
1200	FNIEMAIMPLXCHGU8 *pfnU8;
1201	FNIEMAIMPLXCHGU16 *pfnU16;
1202	FNIEMAIMPLXCHGU32 *pfnU32;
1203	FNIEMAIMPLXCHGU64 *pfnU64;
1204	} u;
1205	}
1206	s_aXchgWorkers[] =
1207	{
1208	{ 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_locked } },
1209	{ 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_locked } },
1210	{ 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_locked } },
1211	{ 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_locked } },
1212	{ 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_unlocked } },
1213	{ 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_unlocked } },
1214	{ 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_unlocked } },
1215	{ 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_unlocked } },
1216	};
1217	for (size_t i = 0; i < RT_ELEMENTS(s_aXchgWorkers); i++)
1218	{
1219	RTUINT64U uIn1, uIn2, uMem, uDst;
1220	uMem.u = uIn1.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1221	uDst.u = uIn2.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1222	if (uIn1.u == uIn2.u)
1223	uDst.u = uIn2.u = ~uIn2.u;
1224
1225	switch (s_aXchgWorkers[i].cb)
1226	{
1227	case 1:
1228	s_aXchgWorkers[i].u.pfnU8(g_pu8, g_pu8Two);
1229	s_aXchgWorkers[i].u.pfnU8(&uMem.au8[0], &uDst.au8[0]);
1230	break;
1231	case 2:
1232	s_aXchgWorkers[i].u.pfnU16(g_pu16, g_pu16Two);
1233	s_aXchgWorkers[i].u.pfnU16(&uMem.Words.w0, &uDst.Words.w0);
1234	break;
1235	case 4:
1236	s_aXchgWorkers[i].u.pfnU32(g_pu32, g_pu32Two);
1237	s_aXchgWorkers[i].u.pfnU32(&uMem.DWords.dw0, &uDst.DWords.dw0);
1238	break;
1239	case 8:
1240	s_aXchgWorkers[i].u.pfnU64(g_pu64, g_pu64Two);
1241	s_aXchgWorkers[i].u.pfnU64(&uMem.u, &uDst.u);
1242	break;
1243	default: RTTestFailed(g_hTest, "%d\n", s_aXchgWorkers[i].cb); break;
1244	}
1245
1246	if (uMem.u != uIn2.u \|\| uDst.u != uIn1.u)
1247	RTTestFailed(g_hTest, "i=%u: %#RX64, %#RX64 -> %#RX64, %#RX64\n", i, uIn1.u, uIn2.u, uMem.u, uDst.u);
1248	}
1249	}
1250
1251
1252	/*
1253	* XADD
1254	*/
1255	static void XaddTest(void)
1256	{
1257	#define TEST_XADD(a_cBits, a_Type, a_Fmt) do { \
1258	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXADDU ## a_cBits, (a_Type , a_Type , uint32_t *)); \
1259	static struct \
1260	{ \
1261	const char *pszName; \
1262	FNIEMAIMPLXADDU ## a_cBits *pfn; \
1263	BINU ## a_cBits ## _TEST_T const *paTests; \
1264	uint32_t const *pcTests; \
1265	} const s_aFuncs[] = \
1266	{ \
1267	{ "xadd_u" # a_cBits, iemAImpl_xadd_u ## a_cBits, \
1268	g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1269	{ "xadd_u" # a_cBits "8_locked", iemAImpl_xadd_u ## a_cBits ## _locked, \
1270	g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1271	}; \
1272	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1273	{ \
1274	RTTestSub(g_hTest, s_aFuncs[iFn].pszName); \
1275	uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1276	BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1277	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1278	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1279	{ \
1280	uint32_t fEfl = paTests[iTest].fEflIn; \
1281	a_Type uSrc = paTests[iTest].uSrcIn; \
1282	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1283	s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uSrc, &fEfl); \
1284	if ( fEfl != paTests[iTest].fEflOut \
1285	\|\| *g_pu ## a_cBits != paTests[iTest].uDstOut \
1286	\|\| uSrc != paTests[iTest].uDstIn) \
1287	RTTestFailed(g_hTest, "%s/#%u: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt " src=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1288	s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1289	fEfl, *g_pu ## a_cBits, uSrc, paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].uDstIn, \
1290	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1291	} \
1292	} \
1293	} while(0)
1294	TEST_XADD(8, uint8_t, "%#04x");
1295	TEST_XADD(16, uint16_t, "%#06x");
1296	TEST_XADD(32, uint32_t, "%#010RX32");
1297	TEST_XADD(64, uint64_t, "%#010RX64");
1298	}
1299
1300
1301	/*
1302	* CMPXCHG
1303	*/
1304
1305	static void CmpXchgTest(void)
1306	{
1307	#define TEST_CMPXCHG(a_cBits, a_Type, a_Fmt) do {\
1308	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHGU ## a_cBits, (a_Type , a_Type , a_Type, uint32_t *)); \
1309	static struct \
1310	{ \
1311	const char *pszName; \
1312	FNIEMAIMPLCMPXCHGU ## a_cBits *pfn; \
1313	PFNIEMAIMPLBINU ## a_cBits pfnSub; \
1314	BINU ## a_cBits ## _TEST_T const *paTests; \
1315	uint32_t const *pcTests; \
1316	} const s_aFuncs[] = \
1317	{ \
1318	{ "cmpxchg_u" # a_cBits, iemAImpl_cmpxchg_u ## a_cBits, iemAImpl_sub_u ## a_cBits, \
1319	g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1320	{ "cmpxchg_u" # a_cBits "_locked", iemAImpl_cmpxchg_u ## a_cBits ## _locked, iemAImpl_sub_u ## a_cBits, \
1321	g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1322	}; \
1323	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1324	{ \
1325	RTTestSub(g_hTest, s_aFuncs[iFn].pszName); \
1326	BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1327	uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1328	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1329	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1330	{ \
1331	/* as is (99% likely to be negative). */ \
1332	uint32_t fEfl = paTests[iTest].fEflIn; \
1333	a_Type const uNew = paTests[iTest].uSrcIn + 0x42; \
1334	a_Type uA = paTests[iTest].uDstIn; \
1335	*g_pu ## a_cBits = paTests[iTest].uSrcIn; \
1336	a_Type const uExpect = uA != paTests[iTest].uSrcIn ? paTests[iTest].uSrcIn : uNew; \
1337	s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1338	if ( fEfl != paTests[iTest].fEflOut \
1339	\|\| *g_pu ## a_cBits != uExpect \
1340	\|\| uA != paTests[iTest].uSrcIn) \
1341	RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1342	s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uSrcIn, paTests[iTest].uDstIn, \
1343	uNew, fEfl, *g_pu ## a_cBits, uA, paTests[iTest].fEflOut, uExpect, paTests[iTest].uSrcIn, \
1344	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1345	/* positive */ \
1346	uint32_t fEflExpect = paTests[iTest].fEflIn; \
1347	uA = paTests[iTest].uDstIn; \
1348	s_aFuncs[iFn].pfnSub(&uA, uA, &fEflExpect); \
1349	fEfl = paTests[iTest].fEflIn; \
1350	uA = paTests[iTest].uDstIn; \
1351	*g_pu ## a_cBits = uA; \
1352	s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1353	if ( fEfl != fEflExpect \
1354	\|\| *g_pu ## a_cBits != uNew \
1355	\|\| uA != paTests[iTest].uDstIn) \
1356	RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1357	s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uDstIn, \
1358	uNew, fEfl, *g_pu ## a_cBits, uA, fEflExpect, uNew, paTests[iTest].uDstIn, \
1359	EFlagsDiff(fEfl, fEflExpect)); \
1360	} \
1361	} \
1362	} while(0)
1363	TEST_CMPXCHG(8, uint8_t, "%#04RX8");
1364	TEST_CMPXCHG(16, uint16_t, "%#06x");
1365	TEST_CMPXCHG(32, uint32_t, "%#010RX32");
1366	#if ARCH_BITS != 32 /* calling convension issue, skipping as it's an unsupported host */
1367	TEST_CMPXCHG(64, uint64_t, "%#010RX64");
1368	#endif
1369	}
1370
1371	static void CmpXchg8bTest(void)
1372	{
1373	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG8B,(uint64_t , PRTUINT64U, PRTUINT64U, uint32_t ));
1374	static struct
1375	{
1376	const char *pszName;
1377	FNIEMAIMPLCMPXCHG8B *pfn;
1378	} const s_aFuncs[] =
1379	{
1380	{ "cmpxchg8b", iemAImpl_cmpxchg8b },
1381	{ "cmpxchg8b_locked", iemAImpl_cmpxchg8b_locked },
1382	};
1383	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1384	{
1385	RTTestSub(g_hTest, s_aFuncs[iFn].pszName);
1386	for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1387	{
1388	uint64_t const uOldValue = RandU64();
1389	uint64_t const uNewValue = RandU64();
1390
1391	/* positive test. */
1392	RTUINT64U uA, uB;
1393	uB.u = uNewValue;
1394	uA.u = uOldValue;
1395	*g_pu64 = uOldValue;
1396	uint32_t fEflIn = RandEFlags();
1397	uint32_t fEfl = fEflIn;
1398	s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1399	if ( fEfl != (fEflIn \| X86_EFL_ZF)
1400	\|\| *g_pu64 != uNewValue
1401	\|\| uA.u != uOldValue)
1402	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1403	iTest, fEflIn, uOldValue, uOldValue, uNewValue,
1404	fEfl, *g_pu64, uA.u,
1405	(fEflIn \| X86_EFL_ZF), uNewValue, uOldValue, EFlagsDiff(fEfl, fEflIn \| X86_EFL_ZF));
1406	RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1407
1408	/* negative */
1409	uint64_t const uExpect = ~uOldValue;
1410	*g_pu64 = uExpect;
1411	uA.u = uOldValue;
1412	uB.u = uNewValue;
1413	fEfl = fEflIn = RandEFlags();
1414	s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1415	if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1416	\|\| *g_pu64 != uExpect
1417	\|\| uA.u != uExpect)
1418	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1419	iTest + 1, fEflIn, uExpect, uOldValue, uNewValue,
1420	fEfl, *g_pu64, uA.u,
1421	(fEflIn & ~X86_EFL_ZF), uExpect, uExpect, EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1422	RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1423	}
1424	}
1425	}
1426
1427	static void CmpXchg16bTest(void)
1428	{
1429	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG16B,(PRTUINT128U, PRTUINT128U, PRTUINT128U, uint32_t *));
1430	static struct
1431	{
1432	const char *pszName;
1433	FNIEMAIMPLCMPXCHG16B *pfn;
1434	} const s_aFuncs[] =
1435	{
1436	{ "cmpxchg16b", iemAImpl_cmpxchg16b },
1437	{ "cmpxchg16b_locked", iemAImpl_cmpxchg16b_locked },
1438	#if !defined(RT_ARCH_ARM64)
1439	{ "cmpxchg16b_fallback", iemAImpl_cmpxchg16b_fallback },
1440	#endif
1441	};
1442	for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1443	{
1444	#if !defined(IEM_WITHOUT_ASSEMBLY) && defined(RT_ARCH_AMD64)
1445	if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_CX16))
1446	continue;
1447	#endif
1448	RTTestSub(g_hTest, s_aFuncs[iFn].pszName);
1449	for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1450	{
1451	RTUINT128U const uOldValue = RandU128();
1452	RTUINT128U const uNewValue = RandU128();
1453
1454	/* positive test. */
1455	RTUINT128U uA, uB;
1456	uB = uNewValue;
1457	uA = uOldValue;
1458	*g_pu128 = uOldValue;
1459	uint32_t fEflIn = RandEFlags();
1460	uint32_t fEfl = fEflIn;
1461	s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1462	if ( fEfl != (fEflIn \| X86_EFL_ZF)
1463	\|\| g_pu128->s.Lo != uNewValue.s.Lo
1464	\|\| g_pu128->s.Hi != uNewValue.s.Hi
1465	\|\| uA.s.Lo != uOldValue.s.Lo
1466	\|\| uA.s.Hi != uOldValue.s.Hi)
1467	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1468	" -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1469	" wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1470	iTest, fEflIn, uOldValue.s.Hi, uOldValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1471	fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1472	(fEflIn \| X86_EFL_ZF), uNewValue.s.Hi, uNewValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo,
1473	EFlagsDiff(fEfl, fEflIn \| X86_EFL_ZF));
1474	RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1475
1476	/* negative */
1477	RTUINT128U const uExpect = RTUINT128_INIT(~uOldValue.s.Hi, ~uOldValue.s.Lo);
1478	*g_pu128 = uExpect;
1479	uA = uOldValue;
1480	uB = uNewValue;
1481	fEfl = fEflIn = RandEFlags();
1482	s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1483	if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1484	\|\| g_pu128->s.Lo != uExpect.s.Lo
1485	\|\| g_pu128->s.Hi != uExpect.s.Hi
1486	\|\| uA.s.Lo != uExpect.s.Lo
1487	\|\| uA.s.Hi != uExpect.s.Hi)
1488	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1489	" -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1490	" wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1491	iTest + 1, fEflIn, uExpect.s.Hi, uExpect.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1492	fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1493	(fEflIn & ~X86_EFL_ZF), uExpect.s.Hi, uExpect.s.Lo, uExpect.s.Hi, uExpect.s.Lo,
1494	EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1495	RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1496	}
1497	}
1498	}
1499
1500
1501	/*
1502	* Double shifts.
1503	*
1504	* Note! We use BINUxx_TEST_T with the shift value in the uMisc field.
1505	*/
1506	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1507	# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1508	void ShiftDblU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1509	{ \
1510	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1511	{ \
1512	if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1513	&& a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1514	continue; \
1515	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1516	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1517	{ \
1518	a_TestType Test; \
1519	Test.fEflIn = RandEFlags(); \
1520	Test.fEflOut = Test.fEflIn; \
1521	Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1522	Test.uDstOut = Test.uDstIn; \
1523	Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1524	Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
1525	a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, Test.uMisc, &Test.fEflOut); \
1526	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %2u }, /* #%u */\n", \
1527	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1528	} \
1529	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
1530	} \
1531	}
1532	#else
1533	# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests)
1534	#endif
1535
1536	#define TEST_SHIFT_DBL(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
1537	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTDBLU ## a_cBits); \
1538	\
1539	static a_SubTestType const a_aSubTests[] = \
1540	{ \
1541	ENTRY_AMD(shld_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
1542	ENTRY_INTEL(shld_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
1543	ENTRY_AMD(shrd_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
1544	ENTRY_INTEL(shrd_u ## a_cBits, X86_EFL_OF \| X86_EFL_CF), \
1545	}; \
1546	\
1547	GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1548	\
1549	static void ShiftDblU ## a_cBits ## Test(void) \
1550	{ \
1551	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1552	{ \
1553	RTTestSub(g_hTest, a_aSubTests[iFn].pszName); \
1554	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1555	PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1556	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1557	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1558	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1559	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1560	{ \
1561	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1562	{ \
1563	uint32_t fEfl = paTests[iTest].fEflIn; \
1564	a_Type uDst = paTests[iTest].uDstIn; \
1565	pfn(&uDst, paTests[iTest].uSrcIn, paTests[iTest].uMisc, &fEfl); \
1566	if ( uDst != paTests[iTest].uDstOut \
1567	\|\| fEfl != paTests[iTest].fEflOut) \
1568	RTTestFailed(g_hTest, "#%03u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " shift=%-2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s%s\n", \
1569	iTest, iVar == 0 ? "" : "/n", paTests[iTest].fEflIn, \
1570	paTests[iTest].uDstIn, paTests[iTest].uSrcIn, (unsigned)paTests[iTest].uMisc, \
1571	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1572	EFlagsDiff(fEfl, paTests[iTest].fEflOut), uDst == paTests[iTest].uDstOut ? "" : " dst!"); \
1573	else \
1574	{ \
1575	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1576	*g_pfEfl = paTests[iTest].fEflIn; \
1577	pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, paTests[iTest].uMisc, g_pfEfl); \
1578	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1579	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1580	} \
1581	} \
1582	pfn = a_aSubTests[iFn].pfnNative; \
1583	} \
1584	} \
1585	}
1586	TEST_SHIFT_DBL(16, uint16_t, "%#06RX16", BINU16_TEST_T, SHIFT_DBL_U16_T, g_aShiftDblU16)
1587	TEST_SHIFT_DBL(32, uint32_t, "%#010RX32", BINU32_TEST_T, SHIFT_DBL_U32_T, g_aShiftDblU32)
1588	TEST_SHIFT_DBL(64, uint64_t, "%#018RX64", BINU64_TEST_T, SHIFT_DBL_U64_T, g_aShiftDblU64)
1589
1590	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1591	static void ShiftDblGenerate(PRTSTREAM pOut, uint32_t cTests)
1592	{
1593	ShiftDblU16Generate(pOut, cTests);
1594	ShiftDblU32Generate(pOut, cTests);
1595	ShiftDblU64Generate(pOut, cTests);
1596	}
1597	#endif
1598
1599	static void ShiftDblTest(void)
1600	{
1601	ShiftDblU16Test();
1602	ShiftDblU32Test();
1603	ShiftDblU64Test();
1604	}
1605
1606
1607	/*
1608	* Unary operators.
1609	*
1610	* Note! We use BINUxx_TEST_T ignoreing uSrcIn and uMisc.
1611	*/
1612	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1613	# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1614	void UnaryU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1615	{ \
1616	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1617	{ \
1618	GenerateArrayStart(pOut, g_aUnaryU ## a_cBits[iFn].pszName, #a_TestType); \
1619	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1620	{ \
1621	a_TestType Test; \
1622	Test.fEflIn = RandEFlags(); \
1623	Test.fEflOut = Test.fEflIn; \
1624	Test.uDstIn = RandU ## a_cBits(); \
1625	Test.uDstOut = Test.uDstIn; \
1626	Test.uSrcIn = 0; \
1627	Test.uMisc = 0; \
1628	g_aUnaryU ## a_cBits[iFn].pfn(&Test.uDstOut, &Test.fEflOut); \
1629	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, 0 }, /* #%u */\n", \
1630	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, iTest); \
1631	} \
1632	GenerateArrayEnd(pOut, g_aUnaryU ## a_cBits[iFn].pszName); \
1633	} \
1634	}
1635	#else
1636	# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType)
1637	#endif
1638
1639	#define TEST_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1640	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLUNARYU ## a_cBits); \
1641	static a_SubTestType const g_aUnaryU ## a_cBits [] = \
1642	{ \
1643	ENTRY(inc_u ## a_cBits), \
1644	ENTRY(inc_u ## a_cBits ## _locked), \
1645	ENTRY(dec_u ## a_cBits), \
1646	ENTRY(dec_u ## a_cBits ## _locked), \
1647	ENTRY(not_u ## a_cBits), \
1648	ENTRY(not_u ## a_cBits ## _locked), \
1649	ENTRY(neg_u ## a_cBits), \
1650	ENTRY(neg_u ## a_cBits ## _locked), \
1651	}; \
1652	\
1653	GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1654	\
1655	static void UnaryU ## a_cBits ## Test(void) \
1656	{ \
1657	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1658	{ \
1659	RTTestSub(g_hTest, g_aUnaryU ## a_cBits[iFn].pszName); \
1660	a_TestType const * const paTests = g_aUnaryU ## a_cBits[iFn].paTests; \
1661	uint32_t const cTests = *g_aUnaryU ## a_cBits[iFn].pcTests; \
1662	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1663	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1664	{ \
1665	uint32_t fEfl = paTests[iTest].fEflIn; \
1666	a_Type uDst = paTests[iTest].uDstIn; \
1667	g_aUnaryU ## a_cBits[iFn].pfn(&uDst, &fEfl); \
1668	if ( uDst != paTests[iTest].uDstOut \
1669	\|\| fEfl != paTests[iTest].fEflOut) \
1670	RTTestFailed(g_hTest, "#%u: efl=%#08x dst=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
1671	iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, \
1672	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1673	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1674	else \
1675	{ \
1676	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1677	*g_pfEfl = paTests[iTest].fEflIn; \
1678	g_aUnaryU ## a_cBits[iFn].pfn(g_pu ## a_cBits, g_pfEfl); \
1679	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1680	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1681	} \
1682	} \
1683	} \
1684	}
1685	TEST_UNARY(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_UNARY_U8_T)
1686	TEST_UNARY(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_UNARY_U16_T)
1687	TEST_UNARY(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_UNARY_U32_T)
1688	TEST_UNARY(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_UNARY_U64_T)
1689
1690	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1691	static void UnaryGenerate(PRTSTREAM pOut, uint32_t cTests)
1692	{
1693	UnaryU8Generate(pOut, cTests);
1694	UnaryU16Generate(pOut, cTests);
1695	UnaryU32Generate(pOut, cTests);
1696	UnaryU64Generate(pOut, cTests);
1697	}
1698	#endif
1699
1700	static void UnaryTest(void)
1701	{
1702	UnaryU8Test();
1703	UnaryU16Test();
1704	UnaryU32Test();
1705	UnaryU64Test();
1706	}
1707
1708
1709	/*
1710	* Shifts.
1711	*
1712	* Note! We use BINUxx_TEST_T with the shift count in uMisc and uSrcIn unused.
1713	*/
1714	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1715	# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1716	void ShiftU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1717	{ \
1718	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1719	{ \
1720	if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1721	&& a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1722	continue; \
1723	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1724	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1725	{ \
1726	a_TestType Test; \
1727	Test.fEflIn = RandEFlags(); \
1728	Test.fEflOut = Test.fEflIn; \
1729	Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1730	Test.uDstOut = Test.uDstIn; \
1731	Test.uSrcIn = 0; \
1732	Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
1733	a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
1734	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u */\n", \
1735	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
1736	\
1737	Test.fEflIn = (~Test.fEflIn & X86_EFL_LIVE_MASK) \| X86_EFL_RA1_MASK; \
1738	Test.fEflOut = Test.fEflIn; \
1739	Test.uDstOut = Test.uDstIn; \
1740	a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
1741	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u b */\n", \
1742	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
1743	} \
1744	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
1745	} \
1746	}
1747	#else
1748	# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests)
1749	#endif
1750
1751	#define TEST_SHIFT(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
1752	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTU ## a_cBits); \
1753	static a_SubTestType const a_aSubTests[] = \
1754	{ \
1755	ENTRY_AMD( rol_u ## a_cBits, X86_EFL_OF), \
1756	ENTRY_INTEL(rol_u ## a_cBits, X86_EFL_OF), \
1757	ENTRY_AMD( ror_u ## a_cBits, X86_EFL_OF), \
1758	ENTRY_INTEL(ror_u ## a_cBits, X86_EFL_OF), \
1759	ENTRY_AMD( rcl_u ## a_cBits, X86_EFL_OF), \
1760	ENTRY_INTEL(rcl_u ## a_cBits, X86_EFL_OF), \
1761	ENTRY_AMD( rcr_u ## a_cBits, X86_EFL_OF), \
1762	ENTRY_INTEL(rcr_u ## a_cBits, X86_EFL_OF), \
1763	ENTRY_AMD( shl_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
1764	ENTRY_INTEL(shl_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
1765	ENTRY_AMD( shr_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
1766	ENTRY_INTEL(shr_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
1767	ENTRY_AMD( sar_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
1768	ENTRY_INTEL(sar_u ## a_cBits, X86_EFL_OF \| X86_EFL_AF), \
1769	}; \
1770	\
1771	GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1772	\
1773	static void ShiftU ## a_cBits ## Test(void) \
1774	{ \
1775	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1776	{ \
1777	RTTestSub(g_hTest, a_aSubTests[iFn].pszName); \
1778	PFNIEMAIMPLSHIFTU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1779	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1780	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1781	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1782	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1783	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1784	{ \
1785	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1786	{ \
1787	uint32_t fEfl = paTests[iTest].fEflIn; \
1788	a_Type uDst = paTests[iTest].uDstIn; \
1789	pfn(&uDst, paTests[iTest].uMisc, &fEfl); \
1790	if ( uDst != paTests[iTest].uDstOut \
1791	\|\| fEfl != paTests[iTest].fEflOut ) \
1792	RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " shift=%2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
1793	iTest, iVar == 0 ? "" : "/n", \
1794	paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uMisc, \
1795	fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1796	EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1797	else \
1798	{ \
1799	*g_pu ## a_cBits = paTests[iTest].uDstIn; \
1800	*g_pfEfl = paTests[iTest].fEflIn; \
1801	pfn(g_pu ## a_cBits, paTests[iTest].uMisc, g_pfEfl); \
1802	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1803	RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1804	} \
1805	} \
1806	pfn = a_aSubTests[iFn].pfnNative; \
1807	} \
1808	} \
1809	}
1810	TEST_SHIFT(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_BINARY_U8_T, g_aShiftU8)
1811	TEST_SHIFT(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_BINARY_U16_T, g_aShiftU16)
1812	TEST_SHIFT(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_BINARY_U32_T, g_aShiftU32)
1813	TEST_SHIFT(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_BINARY_U64_T, g_aShiftU64)
1814
1815	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1816	static void ShiftGenerate(PRTSTREAM pOut, uint32_t cTests)
1817	{
1818	ShiftU8Generate(pOut, cTests);
1819	ShiftU16Generate(pOut, cTests);
1820	ShiftU32Generate(pOut, cTests);
1821	ShiftU64Generate(pOut, cTests);
1822	}
1823	#endif
1824
1825	static void ShiftTest(void)
1826	{
1827	ShiftU8Test();
1828	ShiftU16Test();
1829	ShiftU32Test();
1830	ShiftU64Test();
1831	}
1832
1833
1834	/*
1835	* Multiplication and division.
1836	*
1837	* Note! The 8-bit functions has a different format, so we need to duplicate things.
1838	* Note! Currently ignoring undefined bits.
1839	*/
1840
1841	/* U8 */
1842	TYPEDEF_SUBTEST_TYPE(INT_MULDIV_U8_T, MULDIVU8_TEST_T, PFNIEMAIMPLMULDIVU8);
1843	static INT_MULDIV_U8_T const g_aMulDivU8[] =
1844	{
1845	ENTRY_AMD_EX(mul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF,
1846	X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF),
1847	ENTRY_INTEL_EX(mul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0),
1848	ENTRY_AMD_EX(imul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF,
1849	X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF),
1850	ENTRY_INTEL_EX(imul_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0),
1851	ENTRY_AMD_EX(div_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
1852	ENTRY_INTEL_EX(div_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
1853	ENTRY_AMD_EX(idiv_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
1854	ENTRY_INTEL_EX(idiv_u8, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0),
1855	};
1856
1857	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1858	static void MulDivU8Generate(PRTSTREAM pOut, uint32_t cTests)
1859	{
1860	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
1861	{
1862	if ( g_aMulDivU8[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
1863	&& g_aMulDivU8[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
1864	continue;
1865	GenerateArrayStart(pOut, g_aMulDivU8[iFn].pszName, "MULDIVU8_TEST_T"); \
1866	for (uint32_t iTest = 0; iTest < cTests; iTest++ )
1867	{
1868	MULDIVU8_TEST_T Test;
1869	Test.fEflIn = RandEFlags();
1870	Test.fEflOut = Test.fEflIn;
1871	Test.uDstIn = RandU16Dst(iTest);
1872	Test.uDstOut = Test.uDstIn;
1873	Test.uSrcIn = RandU8Src(iTest);
1874	Test.rc = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut);
1875	RTStrmPrintf(pOut, " { %#08x, %#08x, %#06RX16, %#06RX16, %#04RX8, %d }, /* #%u */\n",
1876	Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.rc, iTest);
1877	}
1878	GenerateArrayEnd(pOut, g_aMulDivU8[iFn].pszName);
1879	}
1880	}
1881	#endif
1882
1883	static void MulDivU8Test(void)
1884	{
1885	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
1886	{
1887	RTTestSub(g_hTest, g_aMulDivU8[iFn].pszName);
1888	MULDIVU8_TEST_T const * const paTests = g_aMulDivU8[iFn].paTests;
1889	uint32_t const cTests = *g_aMulDivU8[iFn].pcTests;
1890	uint32_t const fEflIgn = g_aMulDivU8[iFn].uExtra;
1891	PFNIEMAIMPLMULDIVU8 pfn = g_aMulDivU8[iFn].pfn;
1892	uint32_t const cVars = COUNT_VARIATIONS(g_aMulDivU8[iFn]); \
1893	if (!cTests) RTTestSkipped(g_hTest, "no tests");
1894	for (uint32_t iVar = 0; iVar < cVars; iVar++)
1895	{
1896	for (uint32_t iTest = 0; iTest < cTests; iTest++ )
1897	{
1898	uint32_t fEfl = paTests[iTest].fEflIn;
1899	uint16_t uDst = paTests[iTest].uDstIn;
1900	int rc = g_aMulDivU8[iFn].pfn(&uDst, paTests[iTest].uSrcIn, &fEfl);
1901	if ( uDst != paTests[iTest].uDstOut
1902	\|\| (fEfl \| fEflIgn) != (paTests[iTest].fEflOut \| fEflIgn)
1903	\|\| rc != paTests[iTest].rc)
1904	RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst=%#06RX16 src=%#04RX8\n"
1905	" %s-> efl=%#08x dst=%#06RX16 rc=%d\n"
1906	"%sexpected %#08x %#06RX16 %d%s\n",
1907	iTest, iVar ? "/n" : "", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn,
1908	iVar ? " " : "", fEfl, uDst, rc,
1909	iVar ? " " : "", paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].rc,
1910	EFlagsDiff(fEfl \| fEflIgn, paTests[iTest].fEflOut \| fEflIgn));
1911	else
1912	{
1913	*g_pu16 = paTests[iTest].uDstIn;
1914	*g_pfEfl = paTests[iTest].fEflIn;
1915	rc = g_aMulDivU8[iFn].pfn(g_pu16, paTests[iTest].uSrcIn, g_pfEfl);
1916	RTTEST_CHECK(g_hTest, *g_pu16 == paTests[iTest].uDstOut);
1917	RTTEST_CHECK(g_hTest, (*g_pfEfl \| fEflIgn) == (paTests[iTest].fEflOut \| fEflIgn));
1918	RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc);
1919	}
1920	}
1921	pfn = g_aMulDivU8[iFn].pfnNative;
1922	}
1923	}
1924	}
1925
1926	#ifdef TSTIEMAIMPL_WITH_GENERATOR
1927	# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1928	void MulDivU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1929	{ \
1930	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1931	{ \
1932	if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1933	&& a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1934	continue; \
1935	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1936	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1937	{ \
1938	a_TestType Test; \
1939	Test.fEflIn = RandEFlags(); \
1940	Test.fEflOut = Test.fEflIn; \
1941	Test.uDst1In = RandU ## a_cBits ## Dst(iTest); \
1942	Test.uDst1Out = Test.uDst1In; \
1943	Test.uDst2In = RandU ## a_cBits ## Dst(iTest); \
1944	Test.uDst2Out = Test.uDst2In; \
1945	Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1946	Test.rc = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, &Test.fEflOut); \
1947	RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", %d }, /* #%u */\n", \
1948	Test.fEflIn, Test.fEflOut, Test.uDst1In, Test.uDst1Out, Test.uDst2In, Test.uDst2Out, Test.uSrcIn, \
1949	Test.rc, iTest); \
1950	} \
1951	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
1952	} \
1953	}
1954	#else
1955	# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests)
1956	#endif
1957
1958	#define TEST_MULDIV(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
1959	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLMULDIVU ## a_cBits); \
1960	static a_SubTestType const a_aSubTests [] = \
1961	{ \
1962	ENTRY_AMD_EX(mul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
1963	ENTRY_INTEL_EX(mul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
1964	ENTRY_AMD_EX(imul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
1965	ENTRY_INTEL_EX(imul_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF, 0), \
1966	ENTRY_AMD_EX(div_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
1967	ENTRY_INTEL_EX(div_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
1968	ENTRY_AMD_EX(idiv_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
1969	ENTRY_INTEL_EX(idiv_u ## a_cBits, X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF \| X86_EFL_OF, 0), \
1970	}; \
1971	\
1972	GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1973	\
1974	static void MulDivU ## a_cBits ## Test(void) \
1975	{ \
1976	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1977	{ \
1978	RTTestSub(g_hTest, a_aSubTests[iFn].pszName); \
1979	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1980	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1981	uint32_t const fEflIgn = a_aSubTests[iFn].uExtra; \
1982	PFNIEMAIMPLMULDIVU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1983	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1984	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1985	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1986	{ \
1987	for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1988	{ \
1989	uint32_t fEfl = paTests[iTest].fEflIn; \
1990	a_Type uDst1 = paTests[iTest].uDst1In; \
1991	a_Type uDst2 = paTests[iTest].uDst2In; \
1992	int rc = pfn(&uDst1, &uDst2, paTests[iTest].uSrcIn, &fEfl); \
1993	if ( uDst1 != paTests[iTest].uDst1Out \
1994	\|\| uDst2 != paTests[iTest].uDst2Out \
1995	\|\| (fEfl \| fEflIgn) != (paTests[iTest].fEflOut \| fEflIgn)\
1996	\|\| rc != paTests[iTest].rc) \
1997	RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " src=" a_Fmt "\n" \
1998	" -> efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " rc=%d\n" \
1999	"expected %#08x " a_Fmt " " a_Fmt " %d%s -%s%s%s\n", \
2000	iTest, iVar == 0 ? "" : "/n", \
2001	paTests[iTest].fEflIn, paTests[iTest].uDst1In, paTests[iTest].uDst2In, paTests[iTest].uSrcIn, \
2002	fEfl, uDst1, uDst2, rc, \
2003	paTests[iTest].fEflOut, paTests[iTest].uDst1Out, paTests[iTest].uDst2Out, paTests[iTest].rc, \
2004	EFlagsDiff(fEfl \| fEflIgn, paTests[iTest].fEflOut \| fEflIgn), \
2005	uDst1 != paTests[iTest].uDst1Out ? " dst1" : "", uDst2 != paTests[iTest].uDst2Out ? " dst2" : "", \
2006	(fEfl \| fEflIgn) != (paTests[iTest].fEflOut \| fEflIgn) ? " eflags" : ""); \
2007	else \
2008	{ \
2009	*g_pu ## a_cBits = paTests[iTest].uDst1In; \
2010	*g_pu ## a_cBits ## Two = paTests[iTest].uDst2In; \
2011	*g_pfEfl = paTests[iTest].fEflIn; \
2012	rc = pfn(g_pu ## a_cBits, g_pu ## a_cBits ## Two, paTests[iTest].uSrcIn, g_pfEfl); \
2013	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDst1Out); \
2014	RTTEST_CHECK(g_hTest, *g_pu ## a_cBits ## Two == paTests[iTest].uDst2Out); \
2015	RTTEST_CHECK(g_hTest, (*g_pfEfl \| fEflIgn) == (paTests[iTest].fEflOut \| fEflIgn)); \
2016	RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc); \
2017	} \
2018	} \
2019	pfn = a_aSubTests[iFn].pfnNative; \
2020	} \
2021	} \
2022	}
2023	TEST_MULDIV(16, uint16_t, "%#06RX16", MULDIVU16_TEST_T, INT_MULDIV_U16_T, g_aMulDivU16)
2024	TEST_MULDIV(32, uint32_t, "%#010RX32", MULDIVU32_TEST_T, INT_MULDIV_U32_T, g_aMulDivU32)
2025	TEST_MULDIV(64, uint64_t, "%#018RX64", MULDIVU64_TEST_T, INT_MULDIV_U64_T, g_aMulDivU64)
2026
2027	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2028	static void MulDivGenerate(PRTSTREAM pOut, uint32_t cTests)
2029	{
2030	MulDivU8Generate(pOut, cTests);
2031	MulDivU16Generate(pOut, cTests);
2032	MulDivU32Generate(pOut, cTests);
2033	MulDivU64Generate(pOut, cTests);
2034	}
2035	#endif
2036
2037	static void MulDivTest(void)
2038	{
2039	MulDivU8Test();
2040	MulDivU16Test();
2041	MulDivU32Test();
2042	MulDivU64Test();
2043	}
2044
2045
2046	/*
2047	* BSWAP
2048	*/
2049	static void BswapTest(void)
2050	{
2051	RTTestSub(g_hTest, "bswap_u16");
2052	*g_pu32 = UINT32_C(0x12345678);
2053	iemAImpl_bswap_u16(g_pu32);
2054	#if 0
2055	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0x12347856), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
2056	#else
2057	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0x12340000), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
2058	#endif
2059	*g_pu32 = UINT32_C(0xffff1122);
2060	iemAImpl_bswap_u16(g_pu32);
2061	#if 0
2062	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0xffff2211), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
2063	#else
2064	RTTEST_CHECK_MSG(g_hTest, g_pu32 == UINT32_C(0xffff0000), (g_hTest, "g_pu32=%#RX32\n", *g_pu32));
2065	#endif
2066
2067	RTTestSub(g_hTest, "bswap_u32");
2068	*g_pu32 = UINT32_C(0x12345678);
2069	iemAImpl_bswap_u32(g_pu32);
2070	RTTEST_CHECK(g_hTest, *g_pu32 == UINT32_C(0x78563412));
2071
2072	RTTestSub(g_hTest, "bswap_u64");
2073	*g_pu64 = UINT64_C(0x0123456789abcdef);
2074	iemAImpl_bswap_u64(g_pu64);
2075	RTTEST_CHECK(g_hTest, *g_pu64 == UINT64_C(0xefcdab8967452301));
2076	}
2077
2078
2079
2080	/*********************************************************************************************************************************
2081	* Floating point (x87 style) *
2082	*********************************************************************************************************************************/
2083
2084	/*
2085	* FPU constant loading.
2086	*/
2087	TYPEDEF_SUBTEST_TYPE(FPU_LD_CONST_T, FPU_LD_CONST_TEST_T, PFNIEMAIMPLFPUR80LDCONST);
2088
2089	static const FPU_LD_CONST_T g_aFpuLdConst[] =
2090	{
2091	ENTRY(fld1),
2092	ENTRY(fldl2t),
2093	ENTRY(fldl2e),
2094	ENTRY(fldpi),
2095	ENTRY(fldlg2),
2096	ENTRY(fldln2),
2097	ENTRY(fldz),
2098	};
2099
2100	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2101	static void FpuLdConstGenerate(PRTSTREAM pOut, uint32_t cTests)
2102	{
2103	X86FXSTATE State;
2104	RT_ZERO(State);
2105	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2106	{
2107	GenerateArrayStart(pOut, g_aFpuLdConst[iFn].pszName, "FPU_LD_CONST_TEST_T");
2108	for (uint32_t iTest = 0; iTest < cTests; iTest += 4)
2109	{
2110	State.FCW = RandFcw();
2111	State.FSW = RandFsw();
2112
2113	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2114	{
2115	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2116	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT);
2117	g_aFpuLdConst[iFn].pfn(&State, &Res);
2118	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s }, /* #%u */\n",
2119	State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), iTest + iRounding);
2120	}
2121	}
2122	GenerateArrayEnd(pOut, g_aFpuLdConst[iFn].pszName);
2123	}
2124	}
2125	#endif
2126
2127	static void FpuLoadConstTest(void)
2128	{
2129	/*
2130	* Inputs:
2131	* - FSW: C0, C1, C2, C3
2132	* - FCW: Exception masks, Precision control, Rounding control.
2133	*
2134	* C1 set to 1 on stack overflow, zero otherwise. C0, C2, and C3 are "undefined".
2135	*/
2136	X86FXSTATE State;
2137	RT_ZERO(State);
2138	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2139	{
2140	RTTestSub(g_hTest, g_aFpuLdConst[iFn].pszName);
2141
2142	uint32_t const cTests = *g_aFpuLdConst[iFn].pcTests;
2143	FPU_LD_CONST_TEST_T const *paTests = g_aFpuLdConst[iFn].paTests;
2144	PFNIEMAIMPLFPUR80LDCONST pfn = g_aFpuLdConst[iFn].pfn;
2145	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdConst[iFn]); \
2146	if (!cTests) RTTestSkipped(g_hTest, "no tests");
2147	for (uint32_t iVar = 0; iVar < cVars; iVar++)
2148	{
2149	for (uint32_t iTest = 0; iTest < cTests; iTest++)
2150	{
2151	State.FCW = paTests[iTest].fFcw;
2152	State.FSW = paTests[iTest].fFswIn;
2153	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2154	pfn(&State, &Res);
2155	if ( Res.FSW != paTests[iTest].fFswOut
2156	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2157	RTTestFailed(g_hTest, "#%u%s: fcw=%#06x fsw=%#06x -> fsw=%#06x %s, expected %#06x %s%s%s (%s)\n",
2158	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2159	Res.FSW, FormatR80(&Res.r80Result),
2160	paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2161	FswDiff(Res.FSW, paTests[iTest].fFswOut),
2162	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2163	FormatFcw(paTests[iTest].fFcw) );
2164	}
2165	pfn = g_aFpuLdConst[iFn].pfnNative;
2166	}
2167	}
2168	}
2169
2170
2171	/*
2172	* Load floating point values from memory.
2173	*/
2174	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2175	# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2176	static void FpuLdR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2177	{ \
2178	X86FXSTATE State; \
2179	RT_ZERO(State); \
2180	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2181	{ \
2182	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2183	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2184	{ \
2185	State.FCW = RandFcw(); \
2186	State.FSW = RandFsw(); \
2187	a_rdTypeIn InVal = RandR ## a_cBits ## Src(iTest); \
2188	\
2189	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2190	{ \
2191	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2192	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT); \
2193	a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2194	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n", \
2195	State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), \
2196	GenFormatR ## a_cBits(&InVal), iTest, iRounding); \
2197	} \
2198	} \
2199	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2200	} \
2201	}
2202	#else
2203	# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType)
2204	#endif
2205
2206	#define TEST_FPU_LOAD(a_cBits, a_rdTypeIn, a_SubTestType, a_aSubTests, a_TestType) \
2207	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROM ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, PC ## a_rdTypeIn)); \
2208	typedef FNIEMAIMPLFPULDR80FROM ## a_cBits *PFNIEMAIMPLFPULDR80FROM ## a_cBits; \
2209	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROM ## a_cBits); \
2210	\
2211	static const a_SubTestType a_aSubTests[] = \
2212	{ \
2213	ENTRY(RT_CONCAT(fld_r80_from_r,a_cBits)) \
2214	}; \
2215	GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2216	\
2217	static void FpuLdR ## a_cBits ## Test(void) \
2218	{ \
2219	X86FXSTATE State; \
2220	RT_ZERO(State); \
2221	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2222	{ \
2223	RTTestSub(g_hTest, a_aSubTests[iFn].pszName); \
2224	\
2225	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2226	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2227	PFNIEMAIMPLFPULDR80FROM ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2228	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2229	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2230	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2231	{ \
2232	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2233	{ \
2234	a_rdTypeIn const InVal = paTests[iTest].InVal; \
2235	State.FCW = paTests[iTest].fFcw; \
2236	State.FSW = paTests[iTest].fFswIn; \
2237	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2238	pfn(&State, &Res, &InVal); \
2239	if ( Res.FSW != paTests[iTest].fFswOut \
2240	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2241	RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2242	"%s -> fsw=%#06x %s\n" \
2243	"%s expected %#06x %s%s%s (%s)\n", \
2244	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2245	FormatR ## a_cBits(&paTests[iTest].InVal), \
2246	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2247	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2248	FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2249	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2250	FormatFcw(paTests[iTest].fFcw) ); \
2251	} \
2252	pfn = a_aSubTests[iFn].pfnNative; \
2253	} \
2254	} \
2255	}
2256
2257	TEST_FPU_LOAD(80, RTFLOAT80U, FPU_LD_R80_T, g_aFpuLdR80, FPU_R80_IN_TEST_T)
2258	TEST_FPU_LOAD(64, RTFLOAT64U, FPU_LD_R64_T, g_aFpuLdR64, FPU_R64_IN_TEST_T)
2259	TEST_FPU_LOAD(32, RTFLOAT32U, FPU_LD_R32_T, g_aFpuLdR32, FPU_R32_IN_TEST_T)
2260
2261	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2262	static void FpuLdMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2263	{
2264	FpuLdR80Generate(pOut, cTests);
2265	FpuLdR64Generate(pOut, cTests);
2266	FpuLdR32Generate(pOut, cTests);
2267	}
2268	#endif
2269
2270	static void FpuLdMemTest(void)
2271	{
2272	FpuLdR80Test();
2273	FpuLdR64Test();
2274	FpuLdR32Test();
2275	}
2276
2277
2278	/*
2279	* Load integer values from memory.
2280	*/
2281	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2282	# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2283	static void FpuLdI ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2284	{ \
2285	X86FXSTATE State; \
2286	RT_ZERO(State); \
2287	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2288	{ \
2289	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2290	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2291	{ \
2292	State.FCW = RandFcw(); \
2293	State.FSW = RandFsw(); \
2294	a_iTypeIn InVal = (a_iTypeIn)RandU ## a_cBits ## Src(iTest); \
2295	\
2296	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2297	{ \
2298	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2299	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT); \
2300	a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2301	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, " a_szFmtIn " }, /* #%u/%u */\n", \
2302	State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), InVal, iTest, iRounding); \
2303	} \
2304	} \
2305	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2306	} \
2307	}
2308	#else
2309	# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType)
2310	#endif
2311
2312	#define TEST_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_SubTestType, a_aSubTests, a_TestType) \
2313	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMI ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, a_iTypeIn const *)); \
2314	typedef FNIEMAIMPLFPULDR80FROMI ## a_cBits *PFNIEMAIMPLFPULDR80FROMI ## a_cBits; \
2315	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROMI ## a_cBits); \
2316	\
2317	static const a_SubTestType a_aSubTests[] = \
2318	{ \
2319	ENTRY(RT_CONCAT(fild_r80_from_i,a_cBits)) \
2320	}; \
2321	GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2322	\
2323	static void FpuLdI ## a_cBits ## Test(void) \
2324	{ \
2325	X86FXSTATE State; \
2326	RT_ZERO(State); \
2327	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2328	{ \
2329	RTTestSub(g_hTest, a_aSubTests[iFn].pszName); \
2330	\
2331	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2332	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2333	PFNIEMAIMPLFPULDR80FROMI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2334	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2335	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2336	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2337	{ \
2338	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2339	{ \
2340	a_iTypeIn const iInVal = paTests[iTest].iInVal; \
2341	State.FCW = paTests[iTest].fFcw; \
2342	State.FSW = paTests[iTest].fFswIn; \
2343	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2344	pfn(&State, &Res, &iInVal); \
2345	if ( Res.FSW != paTests[iTest].fFswOut \
2346	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2347	RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=" a_szFmtIn "\n" \
2348	"%s -> fsw=%#06x %s\n" \
2349	"%s expected %#06x %s%s%s (%s)\n", \
2350	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, paTests[iTest].iInVal, \
2351	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2352	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2353	FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2354	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2355	FormatFcw(paTests[iTest].fFcw) ); \
2356	} \
2357	pfn = a_aSubTests[iFn].pfnNative; \
2358	} \
2359	} \
2360	}
2361
2362	TEST_FPU_LOAD_INT(64, int64_t, "%RI64", FPU_LD_I64_T, g_aFpuLdU64, FPU_I64_IN_TEST_T)
2363	TEST_FPU_LOAD_INT(32, int32_t, "%RI32", FPU_LD_I32_T, g_aFpuLdU32, FPU_I32_IN_TEST_T)
2364	TEST_FPU_LOAD_INT(16, int16_t, "%RI16", FPU_LD_I16_T, g_aFpuLdU16, FPU_I16_IN_TEST_T)
2365
2366	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2367	static void FpuLdIntGenerate(PRTSTREAM pOut, uint32_t cTests)
2368	{
2369	FpuLdI64Generate(pOut, cTests);
2370	FpuLdI32Generate(pOut, cTests);
2371	FpuLdI16Generate(pOut, cTests);
2372	}
2373	#endif
2374
2375	static void FpuLdIntTest(void)
2376	{
2377	FpuLdI64Test();
2378	FpuLdI32Test();
2379	FpuLdI16Test();
2380	}
2381
2382
2383	/*
2384	* Load binary coded decimal values from memory.
2385	*/
2386	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMD80,(PCX86FXSTATE, PIEMFPURESULT, PCRTPBCD80U));
2387	typedef FNIEMAIMPLFPULDR80FROMD80 *PFNIEMAIMPLFPULDR80FROMD80;
2388	TYPEDEF_SUBTEST_TYPE(FPU_LD_D80_T, FPU_D80_IN_TEST_T, PFNIEMAIMPLFPULDR80FROMD80);
2389
2390	static const FPU_LD_D80_T g_aFpuLdD80[] =
2391	{
2392	ENTRY(fld_r80_from_d80)
2393	};
2394
2395	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2396	static void FpuLdD80Generate(PRTSTREAM pOut, uint32_t cTests)
2397	{
2398	X86FXSTATE State;
2399	RT_ZERO(State);
2400	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2401	{
2402	GenerateArrayStart(pOut, g_aFpuLdD80[iFn].pszName, "FPU_D80_IN_TEST_T");
2403	for (uint32_t iTest = 0; iTest < cTests; iTest++)
2404	{
2405	State.FCW = RandFcw();
2406	State.FSW = RandFsw();
2407	RTPBCD80U InVal = RandD80Src(iTest);
2408
2409	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2410	{
2411	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2412	State.FCW = (State.FCW & ~X86_FCW_RC_MASK) \| (iRounding << X86_FCW_RC_SHIFT);
2413	g_aFpuLdD80[iFn].pfn(&State, &Res, &InVal);
2414	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n",
2415	State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), GenFormatD80(&InVal),
2416	iTest, iRounding);
2417	}
2418	}
2419	GenerateArrayEnd(pOut, g_aFpuLdD80[iFn].pszName);
2420	}
2421	}
2422	#endif
2423
2424	static void FpuLdD80Test(void)
2425	{
2426	X86FXSTATE State;
2427	RT_ZERO(State);
2428	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2429	{
2430	RTTestSub(g_hTest, g_aFpuLdD80[iFn].pszName);
2431
2432	uint32_t const cTests = *g_aFpuLdD80[iFn].pcTests;
2433	FPU_D80_IN_TEST_T const * const paTests = g_aFpuLdD80[iFn].paTests;
2434	PFNIEMAIMPLFPULDR80FROMD80 pfn = g_aFpuLdD80[iFn].pfn;
2435	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdD80[iFn]);
2436	if (!cTests) RTTestSkipped(g_hTest, "no tests");
2437	for (uint32_t iVar = 0; iVar < cVars; iVar++)
2438	{
2439	for (uint32_t iTest = 0; iTest < cTests; iTest++)
2440	{
2441	RTPBCD80U const InVal = paTests[iTest].InVal;
2442	State.FCW = paTests[iTest].fFcw;
2443	State.FSW = paTests[iTest].fFswIn;
2444	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2445	pfn(&State, &Res, &InVal);
2446	if ( Res.FSW != paTests[iTest].fFswOut
2447	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2448	RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n"
2449	"%s -> fsw=%#06x %s\n"
2450	"%s expected %#06x %s%s%s (%s)\n",
2451	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2452	FormatD80(&paTests[iTest].InVal),
2453	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
2454	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2455	FswDiff(Res.FSW, paTests[iTest].fFswOut),
2456	!RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2457	FormatFcw(paTests[iTest].fFcw) );
2458	}
2459	pfn = g_aFpuLdD80[iFn].pfnNative;
2460	}
2461	}
2462	}
2463
2464
2465	/*
2466	* Store values floating point values to memory.
2467	*/
2468	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2469	static const RTFLOAT80U g_aFpuStR32Specials[] =
2470	{
2471	RTFLOAT80U_INIT_C(0, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2472	RTFLOAT80U_INIT_C(1, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2473	RTFLOAT80U_INIT_C(0, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2474	RTFLOAT80U_INIT_C(1, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2475	};
2476	static const RTFLOAT80U g_aFpuStR64Specials[] =
2477	{
2478	RTFLOAT80U_INIT_C(0, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2479	RTFLOAT80U_INIT_C(1, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2480	RTFLOAT80U_INIT_C(0, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2481	RTFLOAT80U_INIT_C(1, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2482	RTFLOAT80U_INIT_C(0, 0xd0b9e6fdda887400, 687 + RTFLOAT80U_EXP_BIAS), /* random example for this */
2483	};
2484	static const RTFLOAT80U g_aFpuStR80Specials[] =
2485	{
2486	RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* placeholder */
2487	};
2488	# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2489	static void FpuStR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2490	{ \
2491	uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStR ## a_cBits ## Specials); \
2492	X86FXSTATE State; \
2493	RT_ZERO(State); \
2494	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2495	{ \
2496	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2497	for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
2498	{ \
2499	uint16_t const fFcw = RandFcw(); \
2500	State.FSW = RandFsw(); \
2501	RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest) : g_aFpuStR ## a_cBits ## Specials[iTest - cTests]; \
2502	\
2503	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2504	{ \
2505	/* PC doesn't influence these, so leave as is. */ \
2506	AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
2507	for (uint16_t iMask = 0; iMask < 16; iMask += 2 /1/) \
2508	{ \
2509	uint16_t uFswOut = 0; \
2510	a_rdType OutVal; \
2511	RT_ZERO(OutVal); \
2512	memset(&OutVal, 0xfe, sizeof(OutVal)); \
2513	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_OM \| X86_FCW_UM \| X86_FCW_PM)) \
2514	\| (iRounding << X86_FCW_RC_SHIFT); \
2515	/if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;/ \
2516	State.FCW \|= (iMask >> 1) << X86_FCW_OM_BIT; \
2517	a_aSubTests[iFn].pfn(&State, &uFswOut, &OutVal, &InVal); \
2518	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
2519	State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
2520	GenFormatR ## a_cBits(&OutVal), iTest, iRounding, iMask); \
2521	} \
2522	} \
2523	} \
2524	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2525	} \
2526	}
2527	#else
2528	# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType)
2529	#endif
2530
2531	#define TEST_FPU_STORE(a_cBits, a_rdType, a_SubTestType, a_aSubTests, a_TestType) \
2532	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOR ## a_cBits,(PCX86FXSTATE, uint16_t *, \
2533	PRTFLOAT ## a_cBits ## U, PCRTFLOAT80U)); \
2534	typedef FNIEMAIMPLFPUSTR80TOR ## a_cBits *PFNIEMAIMPLFPUSTR80TOR ## a_cBits; \
2535	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPUSTR80TOR ## a_cBits); \
2536	\
2537	static const a_SubTestType a_aSubTests[] = \
2538	{ \
2539	ENTRY(RT_CONCAT(fst_r80_to_r,a_cBits)) \
2540	}; \
2541	GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2542	\
2543	static void FpuStR ## a_cBits ## Test(void) \
2544	{ \
2545	X86FXSTATE State; \
2546	RT_ZERO(State); \
2547	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2548	{ \
2549	RTTestSub(g_hTest, a_aSubTests[iFn].pszName); \
2550	\
2551	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2552	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2553	PFNIEMAIMPLFPUSTR80TOR ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2554	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2555	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2556	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2557	{ \
2558	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2559	{ \
2560	RTFLOAT80U const InVal = paTests[iTest].InVal; \
2561	uint16_t uFswOut = 0; \
2562	a_rdType OutVal; \
2563	RT_ZERO(OutVal); \
2564	memset(&OutVal, 0xfe, sizeof(OutVal)); \
2565	State.FCW = paTests[iTest].fFcw; \
2566	State.FSW = paTests[iTest].fFswIn; \
2567	pfn(&State, &uFswOut, &OutVal, &InVal); \
2568	if ( uFswOut != paTests[iTest].fFswOut \
2569	\|\| !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal)) \
2570	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2571	"%s -> fsw=%#06x %s\n" \
2572	"%s expected %#06x %s%s%s (%s)\n", \
2573	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2574	FormatR80(&paTests[iTest].InVal), \
2575	iVar ? " " : "", uFswOut, FormatR ## a_cBits(&OutVal), \
2576	iVar ? " " : "", paTests[iTest].fFswOut, FormatR ## a_cBits(&paTests[iTest].OutVal), \
2577	FswDiff(uFswOut, paTests[iTest].fFswOut), \
2578	!RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "", \
2579	FormatFcw(paTests[iTest].fFcw) ); \
2580	} \
2581	pfn = a_aSubTests[iFn].pfnNative; \
2582	} \
2583	} \
2584	}
2585
2586	TEST_FPU_STORE(80, RTFLOAT80U, FPU_ST_R80_T, g_aFpuStR80, FPU_ST_R80_TEST_T)
2587	TEST_FPU_STORE(64, RTFLOAT64U, FPU_ST_R64_T, g_aFpuStR64, FPU_ST_R64_TEST_T)
2588	TEST_FPU_STORE(32, RTFLOAT32U, FPU_ST_R32_T, g_aFpuStR32, FPU_ST_R32_TEST_T)
2589
2590	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2591	static void FpuStMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2592	{
2593	FpuStR80Generate(pOut, cTests);
2594	FpuStR64Generate(pOut, cTests);
2595	FpuStR32Generate(pOut, cTests);
2596	}
2597	#endif
2598
2599	static void FpuStMemTest(void)
2600	{
2601	FpuStR80Test();
2602	FpuStR64Test();
2603	FpuStR32Test();
2604	}
2605
2606
2607	/*
2608	* Store integer values to memory or register.
2609	*/
2610	TYPEDEF_SUBTEST_TYPE(FPU_ST_I16_T, FPU_ST_I16_TEST_T, PFNIEMAIMPLFPUSTR80TOI16);
2611	TYPEDEF_SUBTEST_TYPE(FPU_ST_I32_T, FPU_ST_I32_TEST_T, PFNIEMAIMPLFPUSTR80TOI32);
2612	TYPEDEF_SUBTEST_TYPE(FPU_ST_I64_T, FPU_ST_I64_TEST_T, PFNIEMAIMPLFPUSTR80TOI64);
2613
2614	static const FPU_ST_I16_T g_aFpuStI16[] =
2615	{
2616	ENTRY(fist_r80_to_i16),
2617	ENTRY_AMD( fistt_r80_to_i16, 0),
2618	ENTRY_INTEL(fistt_r80_to_i16, 0),
2619	};
2620	static const FPU_ST_I32_T g_aFpuStI32[] =
2621	{
2622	ENTRY(fist_r80_to_i32),
2623	ENTRY(fistt_r80_to_i32),
2624	};
2625	static const FPU_ST_I64_T g_aFpuStI64[] =
2626	{
2627	ENTRY(fist_r80_to_i64),
2628	ENTRY(fistt_r80_to_i64),
2629	};
2630
2631	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2632	static const RTFLOAT80U g_aFpuStI16Specials[] = /* 16-bit variant borrows properties from the 32-bit one, thus all this stuff. */
2633	{
2634	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 13 + RTFLOAT80U_EXP_BIAS),
2635	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 13 + RTFLOAT80U_EXP_BIAS),
2636	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2637	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2638	RTFLOAT80U_INIT_C(0, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2639	RTFLOAT80U_INIT_C(1, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2640	RTFLOAT80U_INIT_C(0, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2641	RTFLOAT80U_INIT_C(1, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2642	RTFLOAT80U_INIT_C(0, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2643	RTFLOAT80U_INIT_C(1, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2644	RTFLOAT80U_INIT_C(0, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2645	RTFLOAT80U_INIT_C(1, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2646	RTFLOAT80U_INIT_C(0, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2647	RTFLOAT80U_INIT_C(1, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2648	RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 14 + RTFLOAT80U_EXP_BIAS),
2649	RTFLOAT80U_INIT_C(0, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2650	RTFLOAT80U_INIT_C(1, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2651	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2652	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2653	RTFLOAT80U_INIT_C(0, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2654	RTFLOAT80U_INIT_C(0, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2655	RTFLOAT80U_INIT_C(0, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2656	RTFLOAT80U_INIT_C(1, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2657	RTFLOAT80U_INIT_C(1, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* min */
2658	RTFLOAT80U_INIT_C(1, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2659	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS),
2660	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 15 + RTFLOAT80U_EXP_BIAS),
2661	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS),
2662	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 17 + RTFLOAT80U_EXP_BIAS),
2663	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS),
2664	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS),
2665	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 28 + RTFLOAT80U_EXP_BIAS),
2666	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2667	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2668	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
2669	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
2670	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2671	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2672	RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2673	RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2674	RTFLOAT80U_INIT_C(0, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
2675	RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
2676	RTFLOAT80U_INIT_C(0, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2677	RTFLOAT80U_INIT_C(1, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2678	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2679	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2680	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 32 + RTFLOAT80U_EXP_BIAS),
2681	};
2682	static const RTFLOAT80U g_aFpuStI32Specials[] =
2683	{
2684	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2685	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2686	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2687	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* min */
2688	RTFLOAT80U_INIT_C(0, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2689	RTFLOAT80U_INIT_C(1, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
2690	RTFLOAT80U_INIT_C(0, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2691	RTFLOAT80U_INIT_C(1, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
2692	RTFLOAT80U_INIT_C(0, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
2693	RTFLOAT80U_INIT_C(1, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
2694	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2695	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2696	RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2697	RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2698	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2699	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2700	};
2701	static const RTFLOAT80U g_aFpuStI64Specials[] =
2702	{
2703	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 61 + RTFLOAT80U_EXP_BIAS),
2704	RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 61 + RTFLOAT80U_EXP_BIAS),
2705	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
2706	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
2707	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
2708	RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
2709	RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2710	RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* min */
2711	RTFLOAT80U_INIT_C(0, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
2712	RTFLOAT80U_INIT_C(1, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
2713	RTFLOAT80U_INIT_C(0, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
2714	RTFLOAT80U_INIT_C(1, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
2715	RTFLOAT80U_INIT_C(0, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
2716	RTFLOAT80U_INIT_C(1, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
2717	RTFLOAT80U_INIT_C(0, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
2718	RTFLOAT80U_INIT_C(1, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
2719	RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 63 + RTFLOAT80U_EXP_BIAS),
2720	};
2721
2722	# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
2723	static void FpuStI ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
2724	{ \
2725	X86FXSTATE State; \
2726	RT_ZERO(State); \
2727	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2728	{ \
2729	PFNIEMAIMPLFPUSTR80TOI ## a_cBits const pfn = a_aSubTests[iFn].pfnNative \
2730	? a_aSubTests[iFn].pfnNative : a_aSubTests[iFn].pfn; \
2731	PRTSTREAM pOutFn = pOut; \
2732	if (a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
2733	{ \
2734	if (a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2735	continue; \
2736	pOutFn = pOutCpu; \
2737	} \
2738	\
2739	GenerateArrayStart(pOutFn, a_aSubTests[iFn].pszName, #a_TestType); \
2740	uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStI ## a_cBits ## Specials); \
2741	for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
2742	{ \
2743	uint16_t const fFcw = RandFcw(); \
2744	State.FSW = RandFsw(); \
2745	RTFLOAT80U const InVal = iTest < cTests ? RandR80Ex(a_cBits, true) \
2746	: g_aFpuStI ## a_cBits ## Specials[iTest - cTests]; \
2747	\
2748	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2749	{ \
2750	/* PC doesn't influence these, so leave as is. */ \
2751	AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
2752	for (uint16_t iMask = 0; iMask < 16; iMask += 2 /1/) \
2753	{ \
2754	uint16_t uFswOut = 0; \
2755	a_iType iOutVal = ~(a_iType)2; \
2756	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_OM \| X86_FCW_UM \| X86_FCW_PM)) \
2757	\| (iRounding << X86_FCW_RC_SHIFT); \
2758	/if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;/ \
2759	State.FCW \|= (iMask >> 1) << X86_FCW_OM_BIT; \
2760	pfn(&State, &uFswOut, &iOutVal, &InVal); \
2761	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
2762	State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
2763	GenFormatI ## a_cBits(iOutVal), iTest, iRounding, iMask); \
2764	} \
2765	} \
2766	} \
2767	GenerateArrayEnd(pOutFn, a_aSubTests[iFn].pszName); \
2768	} \
2769	}
2770	#else
2771	# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType)
2772	#endif
2773
2774	#define TEST_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_SubTestType, a_aSubTests, a_TestType) \
2775	GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
2776	\
2777	static void FpuStI ## a_cBits ## Test(void) \
2778	{ \
2779	X86FXSTATE State; \
2780	RT_ZERO(State); \
2781	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2782	{ \
2783	RTTestSub(g_hTest, a_aSubTests[iFn].pszName); \
2784	\
2785	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2786	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2787	PFNIEMAIMPLFPUSTR80TOI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2788	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2789	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2790	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2791	{ \
2792	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2793	{ \
2794	RTFLOAT80U const InVal = paTests[iTest].InVal; \
2795	uint16_t uFswOut = 0; \
2796	a_iType iOutVal = ~(a_iType)2; \
2797	State.FCW = paTests[iTest].fFcw; \
2798	State.FSW = paTests[iTest].fFswIn; \
2799	pfn(&State, &uFswOut, &iOutVal, &InVal); \
2800	if ( uFswOut != paTests[iTest].fFswOut \
2801	\|\| iOutVal != paTests[iTest].iOutVal) \
2802	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2803	"%s -> fsw=%#06x " a_szFmt "\n" \
2804	"%s expected %#06x " a_szFmt "%s%s (%s)\n", \
2805	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2806	FormatR80(&paTests[iTest].InVal), \
2807	iVar ? " " : "", uFswOut, iOutVal, \
2808	iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].iOutVal, \
2809	FswDiff(uFswOut, paTests[iTest].fFswOut), \
2810	iOutVal != paTests[iTest].iOutVal ? " - val" : "", FormatFcw(paTests[iTest].fFcw) ); \
2811	} \
2812	pfn = a_aSubTests[iFn].pfnNative; \
2813	} \
2814	} \
2815	}
2816
2817	//fistt_r80_to_i16 diffs for AMD, of course :-)
2818
2819	TEST_FPU_STORE_INT(64, int64_t, "%RI64", FPU_ST_I64_T, g_aFpuStI64, FPU_ST_I64_TEST_T)
2820	TEST_FPU_STORE_INT(32, int32_t, "%RI32", FPU_ST_I32_T, g_aFpuStI32, FPU_ST_I32_TEST_T)
2821	TEST_FPU_STORE_INT(16, int16_t, "%RI16", FPU_ST_I16_T, g_aFpuStI16, FPU_ST_I16_TEST_T)
2822
2823	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2824	static void FpuStIntGenerate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
2825	{
2826	FpuStI64Generate(pOut, pOutCpu, cTests);
2827	FpuStI32Generate(pOut, pOutCpu, cTests);
2828	FpuStI16Generate(pOut, pOutCpu, cTests);
2829	}
2830	#endif
2831
2832	static void FpuStIntTest(void)
2833	{
2834	FpuStI64Test();
2835	FpuStI32Test();
2836	FpuStI16Test();
2837	}
2838
2839
2840	/*
2841	* Store as packed BCD value (memory).
2842	*/
2843	typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOD80,(PCX86FXSTATE, uint16_t *, PRTPBCD80U, PCRTFLOAT80U));
2844	typedef FNIEMAIMPLFPUSTR80TOD80 *PFNIEMAIMPLFPUSTR80TOD80;
2845	TYPEDEF_SUBTEST_TYPE(FPU_ST_D80_T, FPU_ST_D80_TEST_T, PFNIEMAIMPLFPUSTR80TOD80);
2846
2847	static const FPU_ST_D80_T g_aFpuStD80[] =
2848	{
2849	ENTRY(fst_r80_to_d80),
2850	};
2851
2852	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2853	static void FpuStD80Generate(PRTSTREAM pOut, uint32_t cTests)
2854	{
2855	static RTFLOAT80U const s_aSpecials[] =
2856	{
2857	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 below max */
2858	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 above min */
2859	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact max */
2860	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact min */
2861	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* max & all rounded off bits set */
2862	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* min & all rounded off bits set */
2863	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* max & some rounded off bits set */
2864	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* min & some rounded off bits set */
2865	RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* max & some other rounded off bits set */
2866	RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* min & some other rounded off bits set */
2867	RTFLOAT80U_INIT_C(0, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 above max */
2868	RTFLOAT80U_INIT_C(1, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 below min */
2869	};
2870
2871	X86FXSTATE State;
2872	RT_ZERO(State);
2873	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
2874	{
2875	GenerateArrayStart(pOut, g_aFpuStD80[iFn].pszName, "FPU_ST_D80_TEST_T");
2876	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
2877	{
2878	uint16_t const fFcw = RandFcw();
2879	State.FSW = RandFsw();
2880	RTFLOAT80U const InVal = iTest < cTests ? RandR80Ex(59, true) : s_aSpecials[iTest - cTests];
2881
2882	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2883	{
2884	/* PC doesn't influence these, so leave as is. */
2885	AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT);
2886	for (uint16_t iMask = 0; iMask < 16; iMask += 2 /1/)
2887	{
2888	uint16_t uFswOut = 0;
2889	RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
2890	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_OM \| X86_FCW_UM \| X86_FCW_PM))
2891	\| (iRounding << X86_FCW_RC_SHIFT);
2892	/if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;/
2893	State.FCW \|= (iMask >> 1) << X86_FCW_OM_BIT;
2894	g_aFpuStD80[iFn].pfn(&State, &uFswOut, &OutVal, &InVal);
2895	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n",
2896	State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal),
2897	GenFormatD80(&OutVal), iTest, iRounding, iMask);
2898	}
2899	}
2900	}
2901	GenerateArrayEnd(pOut, g_aFpuStD80[iFn].pszName);
2902	}
2903	}
2904	#endif
2905
2906
2907	static void FpuStD80Test(void)
2908	{
2909	X86FXSTATE State;
2910	RT_ZERO(State);
2911	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
2912	{
2913	RTTestSub(g_hTest, g_aFpuStD80[iFn].pszName);
2914
2915	uint32_t const cTests = *g_aFpuStD80[iFn].pcTests;
2916	FPU_ST_D80_TEST_T const * const paTests = g_aFpuStD80[iFn].paTests;
2917	PFNIEMAIMPLFPUSTR80TOD80 pfn = g_aFpuStD80[iFn].pfn;
2918	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuStD80[iFn]);
2919	if (!cTests) RTTestSkipped(g_hTest, "no tests");
2920	for (uint32_t iVar = 0; iVar < cVars; iVar++)
2921	{
2922	for (uint32_t iTest = 0; iTest < cTests; iTest++)
2923	{
2924	RTFLOAT80U const InVal = paTests[iTest].InVal;
2925	uint16_t uFswOut = 0;
2926	RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
2927	State.FCW = paTests[iTest].fFcw;
2928	State.FSW = paTests[iTest].fFswIn;
2929	pfn(&State, &uFswOut, &OutVal, &InVal);
2930	if ( uFswOut != paTests[iTest].fFswOut
2931	\|\| !RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal))
2932	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
2933	"%s -> fsw=%#06x %s\n"
2934	"%s expected %#06x %s%s%s (%s)\n",
2935	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2936	FormatR80(&paTests[iTest].InVal),
2937	iVar ? " " : "", uFswOut, FormatD80(&OutVal),
2938	iVar ? " " : "", paTests[iTest].fFswOut, FormatD80(&paTests[iTest].OutVal),
2939	FswDiff(uFswOut, paTests[iTest].fFswOut),
2940	RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "",
2941	FormatFcw(paTests[iTest].fFcw) );
2942	}
2943	pfn = g_aFpuStD80[iFn].pfnNative;
2944	}
2945	}
2946	}
2947
2948
2949
2950	/*********************************************************************************************************************************
2951	* x87 FPU Binary Operations *
2952	*********************************************************************************************************************************/
2953
2954	/*
2955	* Binary FPU operations on two 80-bit floating point values.
2956	*/
2957	TYPEDEF_SUBTEST_TYPE(FPU_BINARY_R80_T, FPU_BINARY_R80_TEST_T, PFNIEMAIMPLFPUR80);
2958
2959	static const FPU_BINARY_R80_T g_aFpuBinaryR80[] =
2960	{
2961	ENTRY(fadd_r80_by_r80),
2962	ENTRY(fsub_r80_by_r80),
2963	ENTRY(fsubr_r80_by_r80),
2964	ENTRY(fmul_r80_by_r80),
2965	ENTRY(fdiv_r80_by_r80),
2966	ENTRY(fdivr_r80_by_r80),
2967	ENTRY(fprem_r80_by_r80),
2968	ENTRY(fprem1_r80_by_r80),
2969	ENTRY(fscale_r80_by_r80),
2970	ENTRY_AMD( fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
2971	ENTRY_INTEL(fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
2972	ENTRY_AMD( fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
2973	ENTRY_INTEL(fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
2974	ENTRY_AMD( fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
2975	ENTRY_INTEL(fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
2976	};
2977
2978	#ifdef TSTIEMAIMPL_WITH_GENERATOR
2979	static void FpuBinaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
2980	{
2981	static struct { RTFLOAT80U Val1, Val2; } const s_aSpecials[] =
2982	{
2983	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
2984	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
2985	};
2986
2987	X86FXSTATE State;
2988	RT_ZERO(State);
2989	uint32_t cMinNormalPairs = cTests / 4;
2990	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
2991	{
2992	PFNIEMAIMPLFPUR80 const pfn = g_aFpuBinaryR80[iFn].pfnNative ? g_aFpuBinaryR80[iFn].pfnNative : g_aFpuBinaryR80[iFn].pfn;
2993	PRTSTREAM pOutFn = pOut;
2994	if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
2995	{
2996	if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
2997	continue;
2998	pOutFn = pOutCpu;
2999	}
3000
3001	GenerateArrayStart(pOutFn, g_aFpuBinaryR80[iFn].pszName, "FPU_BINARY_R80_TEST_T");
3002	uint32_t cNormalInputPairs = 0;
3003	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3004	{
3005	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Ex() : s_aSpecials[iTest - cTests].Val1;
3006	RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Ex() : s_aSpecials[iTest - cTests].Val2;
3007	if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3008	cNormalInputPairs++;
3009	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3010	{
3011	iTest -= 1;
3012	continue;
3013	}
3014
3015	uint16_t const fFcw = RandFcw();
3016	State.FSW = RandFsw();
3017
3018	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3019	{
3020	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3021	{
3022	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
3023	{
3024	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
3025	\| (iRounding << X86_FCW_RC_SHIFT)
3026	\| (iPrecision << X86_FCW_PC_SHIFT)
3027	\| iMask;
3028	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3029	pfn(&State, &Res, &InVal1, &InVal2);
3030	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%c */\n",
3031	State.FCW, State.FSW, Res.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3032	GenFormatR80(&Res.r80Result), iTest, iRounding, iPrecision, iMask ? 'c' : 'u');
3033	}
3034	}
3035	}
3036	}
3037	GenerateArrayEnd(pOutFn, g_aFpuBinaryR80[iFn].pszName);
3038	}
3039	}
3040	#endif
3041
3042
3043	static void FpuBinaryR80Test(void)
3044	{
3045	X86FXSTATE State;
3046	RT_ZERO(State);
3047	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3048	{
3049	RTTestSub(g_hTest, g_aFpuBinaryR80[iFn].pszName);
3050
3051	uint32_t const cTests = *g_aFpuBinaryR80[iFn].pcTests;
3052	FPU_BINARY_R80_TEST_T const * const paTests = g_aFpuBinaryR80[iFn].paTests;
3053	PFNIEMAIMPLFPUR80 pfn = g_aFpuBinaryR80[iFn].pfn;
3054	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryR80[iFn]);
3055	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3056	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3057	{
3058	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3059	{
3060	RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3061	RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3062	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3063	State.FCW = paTests[iTest].fFcw;
3064	State.FSW = paTests[iTest].fFswIn;
3065	pfn(&State, &Res, &InVal1, &InVal2);
3066	if ( Res.FSW != paTests[iTest].fFswOut
3067	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal))
3068	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3069	"%s -> fsw=%#06x %s\n"
3070	"%s expected %#06x %s%s%s (%s)\n",
3071	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3072	FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3073	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3074	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
3075	FswDiff(Res.FSW, paTests[iTest].fFswOut),
3076	RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
3077	FormatFcw(paTests[iTest].fFcw) );
3078	}
3079	pfn = g_aFpuBinaryR80[iFn].pfnNative;
3080	}
3081	}
3082	}
3083
3084
3085	/*
3086	* Binary FPU operations on one 80-bit floating point value and one 64-bit or 32-bit one.
3087	*/
3088	#define int64_t_IS_NORMAL(a) 1
3089	#define int32_t_IS_NORMAL(a) 1
3090	#define int16_t_IS_NORMAL(a) 1
3091
3092	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3093	static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryR64Specials[] =
3094	{
3095	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3096	RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3097	};
3098	static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryR32Specials[] =
3099	{
3100	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3101	RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3102	};
3103	static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryI32Specials[] =
3104	{
3105	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3106	};
3107	static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryI16Specials[] =
3108	{
3109	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3110	};
3111
3112	# define GEN_FPU_BINARY_SMALL(a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3113	static void FpuBinary ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3114	{ \
3115	X86FXSTATE State; \
3116	RT_ZERO(State); \
3117	uint32_t cMinNormalPairs = cTests / 4; \
3118	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3119	{ \
3120	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3121	uint32_t cNormalInputPairs = 0; \
3122	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinary ## a_UpBits ## Specials); iTest += 1) \
3123	{ \
3124	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Ex() \
3125	: s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val1; \
3126	a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src(a_cBits) \
3127	: s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val2; \
3128	if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3129	cNormalInputPairs++; \
3130	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3131	{ \
3132	iTest -= 1; \
3133	continue; \
3134	} \
3135	\
3136	uint16_t const fFcw = RandFcw(); \
3137	State.FSW = RandFsw(); \
3138	\
3139	for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3140	{ \
3141	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++) \
3142	{ \
3143	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3144	{ \
3145	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL)) \
3146	\| (iRounding << X86_FCW_RC_SHIFT) \
3147	\| (iPrecision << X86_FCW_PC_SHIFT) \
3148	\| iMask; \
3149	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3150	a_aSubTests[iFn].pfn(&State, &Res, &InVal1, &InVal2); \
3151	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%c */\n", \
3152	State.FCW, State.FSW, Res.FSW, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3153	GenFormatR80(&Res.r80Result), iTest, iRounding, iPrecision, iMask ? 'c' : 'u'); \
3154	} \
3155	} \
3156	} \
3157	} \
3158	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3159	} \
3160	}
3161	#else
3162	# define GEN_FPU_BINARY_SMALL(a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3163	#endif
3164
3165	#define TEST_FPU_BINARY_SMALL(a_cBits, a_LoBits, a_UpBits, a_I, a_Type2, a_SubTestType, a_aSubTests, a_TestType) \
3166	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits); \
3167	\
3168	static const a_SubTestType a_aSubTests[] = \
3169	{ \
3170	ENTRY(RT_CONCAT4(f, a_I, add_r80_by_, a_LoBits)), \
3171	ENTRY(RT_CONCAT4(f, a_I, mul_r80_by_, a_LoBits)), \
3172	ENTRY(RT_CONCAT4(f, a_I, sub_r80_by_, a_LoBits)), \
3173	ENTRY(RT_CONCAT4(f, a_I, subr_r80_by_, a_LoBits)), \
3174	ENTRY(RT_CONCAT4(f, a_I, div_r80_by_, a_LoBits)), \
3175	ENTRY(RT_CONCAT4(f, a_I, divr_r80_by_, a_LoBits)), \
3176	}; \
3177	\
3178	GEN_FPU_BINARY_SMALL(a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3179	\
3180	static void FpuBinary ## a_UpBits ## Test(void) \
3181	{ \
3182	X86FXSTATE State; \
3183	RT_ZERO(State); \
3184	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3185	{ \
3186	RTTestSub(g_hTest, a_aSubTests[iFn].pszName); \
3187	\
3188	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3189	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3190	PFNIEMAIMPLFPU ## a_UpBits pfn = a_aSubTests[iFn].pfn; \
3191	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3192	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3193	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3194	{ \
3195	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3196	{ \
3197	RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3198	a_Type2 const InVal2 = paTests[iTest].InVal2; \
3199	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3200	State.FCW = paTests[iTest].fFcw; \
3201	State.FSW = paTests[iTest].fFswIn; \
3202	pfn(&State, &Res, &InVal1, &InVal2); \
3203	if ( Res.FSW != paTests[iTest].fFswOut \
3204	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal)) \
3205	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3206	"%s -> fsw=%#06x %s\n" \
3207	"%s expected %#06x %s%s%s (%s)\n", \
3208	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3209	FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3210	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3211	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal), \
3212	FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3213	RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "", \
3214	FormatFcw(paTests[iTest].fFcw) ); \
3215	} \
3216	pfn = a_aSubTests[iFn].pfnNative; \
3217	} \
3218	} \
3219	}
3220
3221	TEST_FPU_BINARY_SMALL(64, r64, R64, RT_NOTHING, RTFLOAT64U, FPU_BINARY_R64_T, g_aFpuBinaryR64, FPU_BINARY_R64_TEST_T)
3222	TEST_FPU_BINARY_SMALL(32, r32, R32, RT_NOTHING, RTFLOAT32U, FPU_BINARY_R32_T, g_aFpuBinaryR32, FPU_BINARY_R32_TEST_T)
3223	TEST_FPU_BINARY_SMALL(32, i32, I32, i, int32_t, FPU_BINARY_I32_T, g_aFpuBinaryI32, FPU_BINARY_I32_TEST_T)
3224	TEST_FPU_BINARY_SMALL(16, i16, I16, i, int16_t, FPU_BINARY_I16_T, g_aFpuBinaryI16, FPU_BINARY_I16_TEST_T)
3225
3226
3227	/*
3228	* Binary operations on 80-, 64- and 32-bit floating point only affecting FSW.
3229	*/
3230	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3231	static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryFswR80Specials[] =
3232	{
3233	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3234	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3235	};
3236	static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryFswR64Specials[] =
3237	{
3238	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3239	RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3240	};
3241	static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryFswR32Specials[] =
3242	{
3243	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3244	RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3245	};
3246	static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryFswI32Specials[] =
3247	{
3248	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3249	};
3250	static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryFswI16Specials[] =
3251	{
3252	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3253	};
3254
3255	# define GEN_FPU_BINARY_FSW(a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3256	static void FpuBinaryFsw ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3257	{ \
3258	X86FXSTATE State; \
3259	RT_ZERO(State); \
3260	uint32_t cMinNormalPairs = cTests / 4; \
3261	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3262	{ \
3263	GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3264	uint32_t cNormalInputPairs = 0; \
3265	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryFsw ## a_UpBits ## Specials); iTest += 1) \
3266	{ \
3267	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Ex() \
3268	: s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val1; \
3269	a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src(a_cBits) \
3270	: s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val2; \
3271	if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3272	cNormalInputPairs++; \
3273	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3274	{ \
3275	iTest -= 1; \
3276	continue; \
3277	} \
3278	\
3279	uint16_t const fFcw = RandFcw(); \
3280	State.FSW = RandFsw(); \
3281	\
3282	/* Guess these aren't affected by precision or rounding, so just flip the exception mask. */ \
3283	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3284	{ \
3285	State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) \| iMask; \
3286	uint16_t fFswOut = 0; \
3287	a_aSubTests[iFn].pfn(&State, &fFswOut, &InVal1, &InVal2); \
3288	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%c */\n", \
3289	State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3290	iTest, iMask ? 'c' : 'u'); \
3291	} \
3292	} \
3293	GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3294	} \
3295	}
3296	#else
3297	# define GEN_FPU_BINARY_FSW(a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3298	#endif
3299
3300	#define TEST_FPU_BINARY_FSW(a_cBits, a_UpBits, a_Type2, a_SubTestType, a_aSubTests, a_TestType, ...) \
3301	TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits ## FSW); \
3302	\
3303	static const a_SubTestType a_aSubTests[] = \
3304	{ \
3305	__VA_ARGS__ \
3306	}; \
3307	\
3308	GEN_FPU_BINARY_FSW(a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3309	\
3310	static void FpuBinaryFsw ## a_UpBits ## Test(void) \
3311	{ \
3312	X86FXSTATE State; \
3313	RT_ZERO(State); \
3314	for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3315	{ \
3316	RTTestSub(g_hTest, a_aSubTests[iFn].pszName); \
3317	\
3318	uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3319	a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3320	PFNIEMAIMPLFPU ## a_UpBits ## FSW pfn = a_aSubTests[iFn].pfn; \
3321	uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3322	if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3323	for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3324	{ \
3325	for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3326	{ \
3327	uint16_t fFswOut = 0; \
3328	RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3329	a_Type2 const InVal2 = paTests[iTest].InVal2; \
3330	State.FCW = paTests[iTest].fFcw; \
3331	State.FSW = paTests[iTest].fFswIn; \
3332	pfn(&State, &fFswOut, &InVal1, &InVal2); \
3333	if (fFswOut != paTests[iTest].fFswOut) \
3334	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3335	"%s -> fsw=%#06x\n" \
3336	"%s expected %#06x %s (%s)\n", \
3337	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3338	FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3339	iVar ? " " : "", fFswOut, \
3340	iVar ? " " : "", paTests[iTest].fFswOut, \
3341	FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) ); \
3342	} \
3343	pfn = a_aSubTests[iFn].pfnNative; \
3344	} \
3345	} \
3346	}
3347
3348	TEST_FPU_BINARY_FSW(80, R80, RTFLOAT80U, FPU_BINARY_FSW_R80_T, g_aFpuBinaryFswR80, FPU_BINARY_R80_TEST_T, ENTRY(fcom_r80_by_r80), ENTRY(fucom_r80_by_r80))
3349	TEST_FPU_BINARY_FSW(64, R64, RTFLOAT64U, FPU_BINARY_FSW_R64_T, g_aFpuBinaryFswR64, FPU_BINARY_R64_TEST_T, ENTRY(fcom_r80_by_r64))
3350	TEST_FPU_BINARY_FSW(32, R32, RTFLOAT32U, FPU_BINARY_FSW_R32_T, g_aFpuBinaryFswR32, FPU_BINARY_R32_TEST_T, ENTRY(fcom_r80_by_r32))
3351	TEST_FPU_BINARY_FSW(32, I32, int32_t, FPU_BINARY_FSW_I32_T, g_aFpuBinaryFswI32, FPU_BINARY_I32_TEST_T, ENTRY(ficom_r80_by_i32))
3352	TEST_FPU_BINARY_FSW(16, I16, int16_t, FPU_BINARY_FSW_I16_T, g_aFpuBinaryFswI16, FPU_BINARY_I16_TEST_T, ENTRY(ficom_r80_by_i16))
3353
3354
3355	/*
3356	* Binary operations on 80-bit floating point that effects only EFLAGS and possibly FSW.
3357	*/
3358	TYPEDEF_SUBTEST_TYPE(FPU_BINARY_EFL_R80_T, FPU_BINARY_EFL_R80_TEST_T, PFNIEMAIMPLFPUR80EFL);
3359
3360	static const FPU_BINARY_EFL_R80_T g_aFpuBinaryEflR80[] =
3361	{
3362	ENTRY(fcomi_r80_by_r80),
3363	ENTRY(fucomi_r80_by_r80),
3364	};
3365
3366	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3367	static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryEflR80Specials[] =
3368	{
3369	{ RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3370	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3371	};
3372
3373	static void FpuBinaryEflR80Generate(PRTSTREAM pOut, uint32_t cTests)
3374	{
3375	X86FXSTATE State;
3376	RT_ZERO(State);
3377	uint32_t cMinNormalPairs = cTests / 4;
3378	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3379	{
3380	GenerateArrayStart(pOut, g_aFpuBinaryEflR80[iFn].pszName, "FPU_BINARY_EFL_R80_TEST_T");
3381	uint32_t cNormalInputPairs = 0;
3382	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryEflR80Specials); iTest += 1)
3383	{
3384	RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Ex() : s_aFpuBinaryEflR80Specials[iTest - cTests].Val1;
3385	RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Ex() : s_aFpuBinaryEflR80Specials[iTest - cTests].Val2;
3386	if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3387	cNormalInputPairs++;
3388	else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3389	{
3390	iTest -= 1;
3391	continue;
3392	}
3393
3394	uint16_t const fFcw = RandFcw();
3395	State.FSW = RandFsw();
3396
3397	/* Guess these aren't affected by precision or rounding, so just flip the exception mask. */
3398	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
3399	{
3400	State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) \| iMask;
3401	uint16_t uFswOut = 0;
3402	uint32_t fEflOut = g_aFpuBinaryEflR80[iFn].pfn(&State, &uFswOut, &InVal1, &InVal2);
3403	RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %#08x }, /* #%u/%c */\n",
3404	State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal1), GenFormatR80(&InVal2), fEflOut,
3405	iTest, iMask ? 'c' : 'u');
3406	}
3407	}
3408	GenerateArrayEnd(pOut, g_aFpuBinaryEflR80[iFn].pszName);
3409	}
3410	}
3411	#endif /TSTIEMAIMPL_WITH_GENERATOR/
3412
3413	static void FpuBinaryEflR80Test(void)
3414	{
3415	X86FXSTATE State;
3416	RT_ZERO(State);
3417	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3418	{
3419	RTTestSub(g_hTest, g_aFpuBinaryEflR80[iFn].pszName);
3420
3421	uint32_t const cTests = *g_aFpuBinaryEflR80[iFn].pcTests;
3422	FPU_BINARY_EFL_R80_TEST_T const * const paTests = g_aFpuBinaryEflR80[iFn].paTests;
3423	PFNIEMAIMPLFPUR80EFL pfn = g_aFpuBinaryEflR80[iFn].pfn;
3424	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryEflR80[iFn]);
3425	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3426	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3427	{
3428	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3429	{
3430	RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3431	RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3432	State.FCW = paTests[iTest].fFcw;
3433	State.FSW = paTests[iTest].fFswIn;
3434	uint16_t uFswOut = 0;
3435	uint32_t fEflOut = pfn(&State, &uFswOut, &InVal1, &InVal2);
3436	if ( uFswOut != paTests[iTest].fFswOut
3437	\|\| fEflOut != paTests[iTest].fEflOut)
3438	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3439	"%s -> fsw=%#06x efl=%#08x\n"
3440	"%s expected %#06x %#08x %s (%s)\n",
3441	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3442	FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3443	iVar ? " " : "", uFswOut, fEflOut,
3444	iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].fEflOut,
3445	EFlagsDiff(fEflOut, paTests[iTest].fEflOut), FormatFcw(paTests[iTest].fFcw));
3446	}
3447	pfn = g_aFpuBinaryEflR80[iFn].pfnNative;
3448	}
3449	}
3450	}
3451
3452
3453	/*********************************************************************************************************************************
3454	* x87 FPU Unary Operations *
3455	*********************************************************************************************************************************/
3456
3457	/*
3458	* Unary FPU operations on one 80-bit floating point value.
3459	*/
3460	TYPEDEF_SUBTEST_TYPE(FPU_UNARY_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARY);
3461
3462	static const FPU_UNARY_R80_T g_aFpuUnaryR80[] =
3463	{
3464	ENTRY(fabs_r80),
3465	ENTRY(fchs_r80),
3466	ENTRY_AMD( f2xm1_r80, 0), // C1 differs for -1m0x3fb263cc2c331e15^-2654
3467	ENTRY_INTEL(f2xm1_r80, 0),
3468	ENTRY(fsqrt_r80),
3469	ENTRY(frndint_r80),
3470	ENTRY_AMD( fsin_r80, 0), // value & C1 differences for pseudo denormals and others (e.g. -1m0x2b1e5683cbca5725^-3485)
3471	ENTRY_INTEL(fsin_r80, 0),
3472	ENTRY_AMD( fcos_r80, 0), // value & C1 differences
3473	ENTRY_INTEL(fcos_r80, 0),
3474	};
3475
3476	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3477	static void FpuUnaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3478	{
3479	static RTFLOAT80U const s_aSpecials[] =
3480	{
3481	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
3482	};
3483
3484	X86FXSTATE State;
3485	RT_ZERO(State);
3486	uint32_t cMinNormals = cTests / 4;
3487	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
3488	{
3489	PFNIEMAIMPLFPUR80UNARY const pfn = g_aFpuUnaryR80[iFn].pfnNative ? g_aFpuUnaryR80[iFn].pfnNative : g_aFpuUnaryR80[iFn].pfn;
3490	PRTSTREAM pOutFn = pOut;
3491	if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3492	{
3493	if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3494	continue;
3495	pOutFn = pOutCpu;
3496	}
3497
3498	GenerateArrayStart(pOutFn, g_aFpuUnaryR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
3499	uint32_t cNormalInputs = 0;
3500	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3501	{
3502	RTFLOAT80U const InVal = iTest < cTests ? RandR80Ex() : s_aSpecials[iTest - cTests];
3503	if (RTFLOAT80U_IS_NORMAL(&InVal))
3504	cNormalInputs++;
3505	else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
3506	{
3507	iTest -= 1;
3508	continue;
3509	}
3510
3511	uint16_t const fFcw = RandFcw();
3512	State.FSW = RandFsw();
3513
3514	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3515	{
3516	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3517	{
3518	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
3519	{
3520	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
3521	\| (iRounding << X86_FCW_RC_SHIFT)
3522	\| (iPrecision << X86_FCW_PC_SHIFT)
3523	\| iMask;
3524	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3525	pfn(&State, &Res, &InVal);
3526	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%c */\n",
3527	State.FCW, State.FSW, Res.FSW, GenFormatR80(&InVal),
3528	GenFormatR80(&Res.r80Result), iTest, iRounding, iPrecision, iMask ? 'c' : 'u');
3529	}
3530	}
3531	}
3532	}
3533	GenerateArrayEnd(pOutFn, g_aFpuUnaryR80[iFn].pszName);
3534	}
3535	}
3536	#endif
3537
3538
3539	static void FpuUnaryR80Test(void)
3540	{
3541	X86FXSTATE State;
3542	RT_ZERO(State);
3543	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
3544	{
3545	RTTestSub(g_hTest, g_aFpuUnaryR80[iFn].pszName);
3546
3547	uint32_t const cTests = *g_aFpuUnaryR80[iFn].pcTests;
3548	FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryR80[iFn].paTests;
3549	PFNIEMAIMPLFPUR80UNARY pfn = g_aFpuUnaryR80[iFn].pfn;
3550	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryR80[iFn]);
3551	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3552	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3553	{
3554	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3555	{
3556	RTFLOAT80U const InVal = paTests[iTest].InVal;
3557	IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3558	State.FCW = paTests[iTest].fFcw;
3559	State.FSW = paTests[iTest].fFswIn;
3560	pfn(&State, &Res, &InVal);
3561	if ( Res.FSW != paTests[iTest].fFswOut
3562	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal))
3563	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
3564	"%s -> fsw=%#06x %s\n"
3565	"%s expected %#06x %s%s%s (%s)\n",
3566	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3567	FormatR80(&paTests[iTest].InVal),
3568	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3569	iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
3570	FswDiff(Res.FSW, paTests[iTest].fFswOut),
3571	RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
3572	FormatFcw(paTests[iTest].fFcw) );
3573	}
3574	pfn = g_aFpuUnaryR80[iFn].pfnNative;
3575	}
3576	}
3577	}
3578
3579
3580	/*
3581	* Unary FPU operations on one 80-bit floating point value, but only affects the FSW.
3582	*/
3583	TYPEDEF_SUBTEST_TYPE(FPU_UNARY_FSW_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYFSW);
3584
3585	static const FPU_UNARY_FSW_R80_T g_aFpuUnaryFswR80[] =
3586	{
3587	ENTRY(ftst_r80),
3588	ENTRY(fxam_r80),
3589	};
3590
3591	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3592	static void FpuUnaryFswR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3593	{
3594	static RTFLOAT80U const s_aSpecials[] =
3595	{
3596	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
3597	};
3598
3599	X86FXSTATE State;
3600	RT_ZERO(State);
3601	uint32_t cMinNormals = cTests / 4;
3602	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
3603	{
3604	PFNIEMAIMPLFPUR80UNARYFSW const pfn = g_aFpuUnaryFswR80[iFn].pfnNative ? g_aFpuUnaryFswR80[iFn].pfnNative : g_aFpuUnaryFswR80[iFn].pfn;
3605	PRTSTREAM pOutFn = pOut;
3606	if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3607	{
3608	if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3609	continue;
3610	pOutFn = pOutCpu;
3611	}
3612
3613	GenerateArrayStart(pOutFn, g_aFpuUnaryFswR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
3614	uint32_t cNormalInputs = 0;
3615	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3616	{
3617	RTFLOAT80U const InVal = iTest < cTests ? RandR80Ex() : s_aSpecials[iTest - cTests];
3618	if (RTFLOAT80U_IS_NORMAL(&InVal))
3619	cNormalInputs++;
3620	else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
3621	{
3622	iTest -= 1;
3623	continue;
3624	}
3625
3626	uint16_t const fFcw = RandFcw();
3627	State.FSW = RandFsw();
3628
3629	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3630	{
3631	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3632	{
3633	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
3634	{
3635	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
3636	\| (iRounding << X86_FCW_RC_SHIFT)
3637	\| (iPrecision << X86_FCW_PC_SHIFT)
3638	\| iMask;
3639	uint16_t fFswOut = 0;
3640	pfn(&State, &fFswOut, &InVal);
3641	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u/%u/%u/%c */\n",
3642	State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal),
3643	iTest, iRounding, iPrecision, iMask ? 'c' : 'u');
3644	}
3645	}
3646	}
3647	}
3648	GenerateArrayEnd(pOutFn, g_aFpuUnaryFswR80[iFn].pszName);
3649	}
3650	}
3651	#endif
3652
3653
3654	static void FpuUnaryFswR80Test(void)
3655	{
3656	X86FXSTATE State;
3657	RT_ZERO(State);
3658	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
3659	{
3660	RTTestSub(g_hTest, g_aFpuUnaryFswR80[iFn].pszName);
3661
3662	uint32_t const cTests = *g_aFpuUnaryFswR80[iFn].pcTests;
3663	FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryFswR80[iFn].paTests;
3664	PFNIEMAIMPLFPUR80UNARYFSW pfn = g_aFpuUnaryFswR80[iFn].pfn;
3665	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryFswR80[iFn]);
3666	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3667	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3668	{
3669	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3670	{
3671	RTFLOAT80U const InVal = paTests[iTest].InVal;
3672	uint16_t fFswOut = 0;
3673	State.FCW = paTests[iTest].fFcw;
3674	State.FSW = paTests[iTest].fFswIn;
3675	pfn(&State, &fFswOut, &InVal);
3676	if (fFswOut != paTests[iTest].fFswOut)
3677	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
3678	"%s -> fsw=%#06x\n"
3679	"%s expected %#06x %s (%s)\n",
3680	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3681	FormatR80(&paTests[iTest].InVal),
3682	iVar ? " " : "", fFswOut,
3683	iVar ? " " : "", paTests[iTest].fFswOut,
3684	FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) );
3685	}
3686	pfn = g_aFpuUnaryFswR80[iFn].pfnNative;
3687	}
3688	}
3689	}
3690
3691	/*
3692	* Unary FPU operations on one 80-bit floating point value, but with two outputs.
3693	*/
3694	TYPEDEF_SUBTEST_TYPE(FPU_UNARY_TWO_R80_T, FPU_UNARY_TWO_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYTWO);
3695
3696	static const FPU_UNARY_TWO_R80_T g_aFpuUnaryTwoR80[] =
3697	{
3698	ENTRY_AMD( fptan_r80_r80, 0), // rounding differences
3699	ENTRY_INTEL(fptan_r80_r80, 0),
3700	ENTRY(fxtract_r80_r80),
3701	ENTRY_AMD( fsincos_r80_r80, 0), // C1 differences & value differences (e.g. -1m0x235cf2f580244a27^-1696)
3702	ENTRY_INTEL(fsincos_r80_r80, 0),
3703	};
3704
3705	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3706	static void FpuUnaryTwoR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3707	{
3708	static RTFLOAT80U const s_aSpecials[] =
3709	{
3710	RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
3711	};
3712
3713	X86FXSTATE State;
3714	RT_ZERO(State);
3715	uint32_t cMinNormals = cTests / 4;
3716	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
3717	{
3718	PFNIEMAIMPLFPUR80UNARYTWO const pfn = g_aFpuUnaryTwoR80[iFn].pfnNative ? g_aFpuUnaryTwoR80[iFn].pfnNative : g_aFpuUnaryTwoR80[iFn].pfn;
3719	PRTSTREAM pOutFn = pOut;
3720	if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3721	{
3722	if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3723	continue;
3724	pOutFn = pOutCpu;
3725	}
3726
3727	GenerateArrayStart(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName, "FPU_UNARY_TWO_R80_TEST_T");
3728	uint32_t cNormalInputs = 0;
3729	for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3730	{
3731	RTFLOAT80U const InVal = iTest < cTests ? RandR80Ex() : s_aSpecials[iTest - cTests];
3732	if (RTFLOAT80U_IS_NORMAL(&InVal))
3733	cNormalInputs++;
3734	else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
3735	{
3736	iTest -= 1;
3737	continue;
3738	}
3739
3740	uint16_t const fFcw = RandFcw();
3741	State.FSW = RandFsw();
3742
3743	for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3744	{
3745	for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3746	{
3747	for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
3748	{
3749	IEMFPURESULTTWO Res = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
3750	State.FCW = (fFcw & ~(X86_FCW_RC_MASK \| X86_FCW_PC_MASK \| X86_FCW_MASK_ALL))
3751	\| (iRounding << X86_FCW_RC_SHIFT)
3752	\| (iPrecision << X86_FCW_PC_SHIFT)
3753	\| iMask;
3754	pfn(&State, &Res, &InVal);
3755	RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%c */\n",
3756	State.FCW, State.FSW, Res.FSW, GenFormatR80(&InVal),
3757	GenFormatR80(&Res.r80Result1), GenFormatR80(&Res.r80Result2),
3758	iTest, iRounding, iPrecision, iMask ? 'c' : 'u');
3759	}
3760	}
3761	}
3762	}
3763	GenerateArrayEnd(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName);
3764	}
3765	}
3766	#endif
3767
3768
3769	static void FpuUnaryTwoR80Test(void)
3770	{
3771	X86FXSTATE State;
3772	RT_ZERO(State);
3773	for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
3774	{
3775	RTTestSub(g_hTest, g_aFpuUnaryTwoR80[iFn].pszName);
3776
3777	uint32_t const cTests = *g_aFpuUnaryTwoR80[iFn].pcTests;
3778	FPU_UNARY_TWO_R80_TEST_T const * const paTests = g_aFpuUnaryTwoR80[iFn].paTests;
3779	PFNIEMAIMPLFPUR80UNARYTWO pfn = g_aFpuUnaryTwoR80[iFn].pfn;
3780	uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryTwoR80[iFn]);
3781	if (!cTests) RTTestSkipped(g_hTest, "no tests");
3782	for (uint32_t iVar = 0; iVar < cVars; iVar++)
3783	{
3784	for (uint32_t iTest = 0; iTest < cTests; iTest++)
3785	{
3786	IEMFPURESULTTWO Res = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
3787	RTFLOAT80U const InVal = paTests[iTest].InVal;
3788	State.FCW = paTests[iTest].fFcw;
3789	State.FSW = paTests[iTest].fFswIn;
3790	pfn(&State, &Res, &InVal);
3791	if ( Res.FSW != paTests[iTest].fFswOut
3792	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1)
3793	\|\| !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) )
3794	RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
3795	"%s -> fsw=%#06x %s %s\n"
3796	"%s expected %#06x %s %s %s%s%s (%s)\n",
3797	iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3798	FormatR80(&paTests[iTest].InVal),
3799	iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result1), FormatR80(&Res.r80Result2),
3800	iVar ? " " : "", paTests[iTest].fFswOut,
3801	FormatR80(&paTests[iTest].OutVal1), FormatR80(&paTests[iTest].OutVal2),
3802	RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1) ? " - val1" : "",
3803	RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) ? " - val2" : "",
3804	FswDiff(Res.FSW, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) );
3805	}
3806	pfn = g_aFpuUnaryTwoR80[iFn].pfnNative;
3807	}
3808	}
3809	}
3810
3811
3812
3813	int main(int argc, char **argv)
3814	{
3815	int rc = RTR3InitExe(argc, &argv, 0);
3816	if (RT_FAILURE(rc))
3817	return RTMsgInitFailure(rc);
3818
3819	/*
3820	* Determin the host CPU.
3821	* If not using the IEMAllAImpl.asm code, this will be set to Intel.
3822	*/
3823	#if (defined(RT_ARCH_X86) \|\| defined(RT_ARCH_AMD64)) && !defined(IEM_WITHOUT_ASSEMBLY)
3824	g_idxCpuEflFlavour = ASMIsAmdCpu() \|\| ASMIsHygonCpu()
3825	? IEMTARGETCPU_EFL_BEHAVIOR_AMD
3826	: IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
3827	#else
3828	g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
3829	#endif
3830
3831	/*
3832	* Parse arguments.
3833	*/
3834	enum { kModeNotSet, kModeTest, kModeGenerate }
3835	enmMode = kModeNotSet;
3836	bool fInt = true;
3837	bool fFpuLdSt = true;
3838	bool fFpuBinary1 = true;
3839	bool fFpuBinary2 = true;
3840	bool fFpuOther = true;
3841	bool fCpuData = true;
3842	bool fCommonData = true;
3843	uint32_t const cDefaultTests = 96;
3844	uint32_t cTests = cDefaultTests;
3845	RTGETOPTDEF const s_aOptions[] =
3846	{
3847	// mode:
3848	{ "--generate", 'g', RTGETOPT_REQ_NOTHING },
3849	{ "--test", 't', RTGETOPT_REQ_NOTHING },
3850	// test selection (both)
3851	{ "--all", 'a', RTGETOPT_REQ_NOTHING },
3852	{ "--none", 'z', RTGETOPT_REQ_NOTHING },
3853	{ "--zap", 'z', RTGETOPT_REQ_NOTHING },
3854	{ "--fpu-ld-st", 'F', RTGETOPT_REQ_NOTHING }, /* FPU stuff is upper case */
3855	{ "--fpu-load-store", 'F', RTGETOPT_REQ_NOTHING },
3856	{ "--fpu-binary-1", 'B', RTGETOPT_REQ_NOTHING },
3857	{ "--fpu-binary-2", 'P', RTGETOPT_REQ_NOTHING },
3858	{ "--fpu-other", 'O', RTGETOPT_REQ_NOTHING },
3859	{ "--int", 'i', RTGETOPT_REQ_NOTHING },
3860	// generation parameters
3861	{ "--common", 'm', RTGETOPT_REQ_NOTHING },
3862	{ "--cpu", 'c', RTGETOPT_REQ_NOTHING },
3863	{ "--number-of-tests", 'n', RTGETOPT_REQ_UINT32 },
3864	};
3865
3866	RTGETOPTSTATE State;
3867	rc = RTGetOptInit(&State, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1, 0);
3868	AssertRCReturn(rc, RTEXITCODE_FAILURE);
3869
3870	RTGETOPTUNION ValueUnion;
3871	while ((rc = RTGetOpt(&State, &ValueUnion)))
3872	{
3873	switch (rc)
3874	{
3875	case 'g':
3876	enmMode = kModeGenerate;
3877	break;
3878	case 't':
3879	enmMode = kModeTest;
3880	break;
3881	case 'a':
3882	fCpuData = true;
3883	fCommonData = true;
3884	fInt = true;
3885	fFpuLdSt = true;
3886	fFpuBinary1 = true;
3887	fFpuBinary2 = true;
3888	fFpuOther = true;
3889	break;
3890	case 'z':
3891	fCpuData = false;
3892	fCommonData = false;
3893	fInt = false;
3894	fFpuLdSt = false;
3895	fFpuBinary1 = false;
3896	fFpuBinary2 = false;
3897	fFpuOther = false;
3898	break;
3899	case 'F':
3900	fFpuLdSt = true;
3901	break;
3902	case 'O':
3903	fFpuOther = true;
3904	break;
3905	case 'B':
3906	fFpuBinary1 = true;
3907	break;
3908	case 'P':
3909	fFpuBinary2 = true;
3910	break;
3911	case 'i':
3912	fInt = true;
3913	break;
3914	case 'm':
3915	fCommonData = true;
3916	break;
3917	case 'c':
3918	fCpuData = true;
3919	break;
3920	case 'n':
3921	cTests = ValueUnion.u32;
3922	break;
3923	case 'h':
3924	RTPrintf("usage: %s <-g\|-t> [options]\n"
3925	"\n"
3926	"Mode:\n"
3927	" -g, --generate\n"
3928	" Generate test data.\n"
3929	" -t, --test\n"
3930	" Execute tests.\n"
3931	"\n"
3932	"Test selection (both modes):\n"
3933	" -a, --all\n"
3934	" Enable all tests and generated test data. (default)\n"
3935	" -z, --zap, --none\n"
3936	" Disable all tests and test data types.\n"
3937	" -i, --int\n"
3938	" Enable non-FPU tests.\n"
3939	" -F, --fpu-ld-st\n"
3940	" Enable FPU load and store tests.\n"
3941	" -B, --fpu-binary-1\n"
3942	" Enable FPU binary 80-bit FP tests.\n"
3943	" -P, --fpu-binary-2\n"
3944	" Enable FPU binary 64- and 32-bit FP tests.\n"
3945	" -O, --fpu-other\n"
3946	" Enable other FPU tests.\n"
3947	"\n"
3948	"Generation:\n"
3949	" -m, --common\n"
3950	" Enable generating common test data.\n"
3951	" -c, --only-cpu\n"
3952	" Enable generating CPU specific test data.\n"
3953	" -n, --number-of-test <count>\n"
3954	" Number of tests to generate. Default: %u\n"
3955	, argv[0], cDefaultTests);
3956	return RTEXITCODE_SUCCESS;
3957	default:
3958	return RTGetOptPrintError(rc, &ValueUnion);
3959	}
3960	}
3961
3962	/*
3963	* Generate data?
3964	*/
3965	if (enmMode == kModeGenerate)
3966	{
3967	#ifdef TSTIEMAIMPL_WITH_GENERATOR
3968	char szCpuDesc[256] = {0};
3969	RTMpGetDescription(NIL_RTCPUID, szCpuDesc, sizeof(szCpuDesc));
3970	const char * const pszCpuType = g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD ? "Amd" : "Intel";
3971	# if defined(RT_OS_WINDOWS) \|\| defined(RT_OS_OS2)
3972	const char * const pszBitBucket = "NUL";
3973	# else
3974	const char * const pszBitBucket = "/dev/null";
3975	# endif
3976
3977	if (cTests == 0)
3978	cTests = cDefaultTests;
3979	g_cZeroDstTests = RT_MIN(cTests / 16, 32);
3980	g_cZeroSrcTests = g_cZeroDstTests * 2;
3981
3982	if (fInt)
3983	{
3984	const char *pszDataFile = fCommonData ? "tstIEMAImplDataInt.cpp" : pszBitBucket;
3985	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
3986	const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
3987	? "tstIEMAImplDataInt-Amd.cpp" : "tstIEMAImplDataInt-Intel.cpp";
3988	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
3989	if (!pStrmData \|\| !pStrmDataCpu)
3990	return RTEXITCODE_FAILURE;
3991
3992	BinU8Generate( pStrmData, pStrmDataCpu, cTests);
3993	BinU16Generate(pStrmData, pStrmDataCpu, cTests);
3994	BinU32Generate(pStrmData, pStrmDataCpu, cTests);
3995	BinU64Generate(pStrmData, pStrmDataCpu, cTests);
3996	ShiftDblGenerate(pStrmDataCpu, RT_MAX(cTests, 128));
3997	UnaryGenerate(pStrmData, cTests);
3998	ShiftGenerate(pStrmDataCpu, cTests);
3999	MulDivGenerate(pStrmDataCpu, cTests);
4000
4001	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4002	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4003	if (rcExit != RTEXITCODE_SUCCESS)
4004	return rcExit;
4005	}
4006
4007	if (fFpuLdSt)
4008	{
4009	const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuLdSt.cpp" : pszBitBucket;
4010	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4011	const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4012	? "tstIEMAImplDataFpuLdSt-Amd.cpp" : "tstIEMAImplDataFpuLdSt-Intel.cpp";
4013	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4014	if (!pStrmData \|\| !pStrmDataCpu)
4015	return RTEXITCODE_FAILURE;
4016
4017	FpuLdConstGenerate(pStrmData, cTests);
4018	FpuLdIntGenerate(pStrmData, cTests);
4019	FpuLdD80Generate(pStrmData, cTests);
4020	FpuStIntGenerate(pStrmData, pStrmDataCpu, cTests);
4021	FpuStD80Generate(pStrmData, cTests);
4022	uint32_t const cTests2 = RT_MAX(cTests, 384); /* need better coverage for the next ones. */
4023	FpuLdMemGenerate(pStrmData, cTests2);
4024	FpuStMemGenerate(pStrmData, cTests2);
4025
4026	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4027	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4028	if (rcExit != RTEXITCODE_SUCCESS)
4029	return rcExit;
4030	}
4031
4032	if (fFpuBinary1)
4033	{
4034	const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary1.cpp" : pszBitBucket;
4035	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4036	const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4037	? "tstIEMAImplDataFpuBinary1-Amd.cpp" : "tstIEMAImplDataFpuBinary1-Intel.cpp";
4038	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4039	if (!pStrmData \|\| !pStrmDataCpu)
4040	return RTEXITCODE_FAILURE;
4041
4042	FpuBinaryR80Generate(pStrmData, pStrmDataCpu, cTests);
4043	FpuBinaryFswR80Generate(pStrmData, cTests);
4044	FpuBinaryEflR80Generate(pStrmData, cTests);
4045
4046	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4047	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4048	if (rcExit != RTEXITCODE_SUCCESS)
4049	return rcExit;
4050	}
4051
4052	if (fFpuBinary2)
4053	{
4054	const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary2.cpp" : pszBitBucket;
4055	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4056	const char pszDataCpuFile = pszBitBucket; /!fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4057	? "tstIEMAImplDataFpuBinary2-Amd.cpp" : "tstIEMAImplDataFpuBinary2-Intel.cpp"; */
4058	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4059	if (!pStrmData \|\| !pStrmDataCpu)
4060	return RTEXITCODE_FAILURE;
4061
4062	FpuBinaryR64Generate(pStrmData, cTests);
4063	FpuBinaryR32Generate(pStrmData, cTests);
4064	FpuBinaryI32Generate(pStrmData, cTests);
4065	FpuBinaryI16Generate(pStrmData, cTests);
4066	FpuBinaryFswR64Generate(pStrmData, cTests);
4067	FpuBinaryFswR32Generate(pStrmData, cTests);
4068	FpuBinaryFswI32Generate(pStrmData, cTests);
4069	FpuBinaryFswI16Generate(pStrmData, cTests);
4070
4071	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4072	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4073	if (rcExit != RTEXITCODE_SUCCESS)
4074	return rcExit;
4075	}
4076
4077	if (fFpuOther)
4078	{
4079	const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuOther.cpp" : pszBitBucket;
4080	PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4081	const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4082	? "tstIEMAImplDataFpuOther-Amd.cpp" : "tstIEMAImplDataFpuOther-Intel.cpp";
4083	PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4084	if (!pStrmData \|\| !pStrmDataCpu)
4085	return RTEXITCODE_FAILURE;
4086
4087	FpuUnaryR80Generate(pStrmData, pStrmDataCpu, cTests);
4088	FpuUnaryFswR80Generate(pStrmData, pStrmDataCpu, cTests);
4089	FpuUnaryTwoR80Generate(pStrmData, pStrmDataCpu, cTests);
4090
4091	RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4092	GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4093	if (rcExit != RTEXITCODE_SUCCESS)
4094	return rcExit;
4095	}
4096
4097	return RTEXITCODE_SUCCESS;
4098	#else
4099	return RTMsgErrorExitFailure("Test data generator not compiled in!");
4100	#endif
4101	}
4102
4103	/*
4104	* Do testing. Currrently disabled by default as data needs to be checked
4105	* on both intel and AMD systems first.
4106	*/
4107	rc = RTTestCreate("tstIEMAimpl", &g_hTest);
4108	AssertRCReturn(rc, RTEXITCODE_FAILURE);
4109	if (enmMode == kModeTest)
4110	{
4111	RTTestBanner(g_hTest);
4112
4113	/* Allocate guarded memory for use in the tests. */
4114	#define ALLOC_GUARDED_VAR(a_puVar) do { \
4115	rc = RTTestGuardedAlloc(g_hTest, sizeof(a_puVar), sizeof(a_puVar), false /fHead/, (void **)&a_puVar); \
4116	if (RT_FAILURE(rc)) RTTestFailed(g_hTest, "Failed to allocate guarded mem: " #a_puVar); \
4117	} while (0)
4118	ALLOC_GUARDED_VAR(g_pu8);
4119	ALLOC_GUARDED_VAR(g_pu16);
4120	ALLOC_GUARDED_VAR(g_pu32);
4121	ALLOC_GUARDED_VAR(g_pu64);
4122	ALLOC_GUARDED_VAR(g_pu128);
4123	ALLOC_GUARDED_VAR(g_pu8Two);
4124	ALLOC_GUARDED_VAR(g_pu16Two);
4125	ALLOC_GUARDED_VAR(g_pu32Two);
4126	ALLOC_GUARDED_VAR(g_pu64Two);
4127	ALLOC_GUARDED_VAR(g_pu128Two);
4128	ALLOC_GUARDED_VAR(g_pfEfl);
4129	if (RTTestErrorCount(g_hTest) == 0)
4130	{
4131	if (fInt)
4132	{
4133	BinU8Test();
4134	BinU16Test();
4135	BinU32Test();
4136	BinU64Test();
4137	XchgTest();
4138	XaddTest();
4139	CmpXchgTest();
4140	CmpXchg8bTest();
4141	CmpXchg16bTest();
4142	ShiftDblTest();
4143	UnaryTest();
4144	ShiftTest();
4145	MulDivTest();
4146	BswapTest();
4147	}
4148
4149	if (fFpuLdSt)
4150	{
4151	FpuLoadConstTest();
4152	FpuLdMemTest();
4153	FpuLdIntTest();
4154	FpuLdD80Test();
4155	FpuStMemTest();
4156	FpuStIntTest();
4157	FpuStD80Test();
4158	}
4159
4160	if (fFpuBinary1)
4161	{
4162	FpuBinaryR80Test();
4163	FpuBinaryFswR80Test();
4164	FpuBinaryEflR80Test();
4165	}
4166
4167	if (fFpuBinary2)
4168	{
4169	FpuBinaryR64Test();
4170	FpuBinaryR32Test();
4171	FpuBinaryI32Test();
4172	FpuBinaryI16Test();
4173	FpuBinaryFswR64Test();
4174	FpuBinaryFswR32Test();
4175	FpuBinaryFswI32Test();
4176	FpuBinaryFswI16Test();
4177	}
4178
4179	if (fFpuOther)
4180	{
4181	FpuUnaryR80Test();
4182	FpuUnaryFswR80Test();
4183	FpuUnaryTwoR80Test();
4184	}
4185	}
4186	return RTTestSummaryAndDestroy(g_hTest);
4187	}
4188	return RTTestSkipAndDestroy(g_hTest, "unfinished testcase");
4189	}
4190

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp@ 94439

Download in other formats: